openwrt-cghmn-mt300n/toolchain/gcc/patches/4.4.1+cs/000-codesourcery_2010_71.patch

--- a/gcc/addresses.h
+++ b/gcc/addresses.h
@@ -78,3 +78,42 @@

   return ok_for_base_p_1 (regno, mode, outer_code, index_code);
 }
+
+/* Wrapper function to unify target macros MODE_INDEX_REG_CLASS and
+   INDEX_REG_CLASS.  Arguments as for the MODE_INDEX_REG_CLASS macro.  */
+
+static inline enum reg_class
+index_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+#ifdef MODE_INDEX_REG_CLASS
+  return MODE_INDEX_REG_CLASS (mode);
+#else
+  return INDEX_REG_CLASS;
+#endif
+}
+
+/* Wrapper function to unify target macros REGNO_MODE_OK_FOR_INDEX_P
+   and REGNO_OK_FOR_INDEX_P.  Arguments as for the
+   REGNO_MODE_OK_FOR_INDEX_P macro.  */
+
+static inline bool
+ok_for_index_p_1 (unsigned regno, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+#ifdef REGNO_MODE_OK_FOR_INDEX_P
+  return REGNO_MODE_OK_FOR_INDEX_P (regno, mode);
+#else
+  return REGNO_OK_FOR_INDEX_P (regno);
+#endif
+}
+
+/* Wrapper around ok_for_index_p_1, for use after register allocation is
+   complete.  Arguments as for the called function.  */
+
+static inline bool
+regno_ok_for_index_p (unsigned regno, enum machine_mode mode)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0)
+    regno = reg_renumber[regno];
+
+  return ok_for_index_p_1 (regno, mode);
+}
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3803,7 +3803,7 @@
      cse'ing of library calls could delete a call and leave the pop.  */
   NO_DEFER_POP;
   valreg = (mem_value == 0 && outmode != VOIDmode
-	    ? hard_libcall_value (outmode) : NULL_RTX);
+	    ? hard_libcall_value (outmode, orgfun) : NULL_RTX);

   /* Stack must be properly aligned now.  */
   gcc_assert (!(stack_pointer_delta
@@ -4048,8 +4048,17 @@
 	      /* We need to make a save area.  */
 	      unsigned int size = arg->locate.size.constant * BITS_PER_UNIT;
 	      enum machine_mode save_mode = mode_for_size (size, MODE_INT, 1);
-	      rtx adr = memory_address (save_mode, XEXP (arg->stack_slot, 0));
-	      rtx stack_area = gen_rtx_MEM (save_mode, adr);
+	      rtx adr;
+	      rtx stack_area;
+
+	      /* We can only use save_mode if the arg is sufficiently
+	         aligned.  */
+	      if (STRICT_ALIGNMENT
+		  && GET_MODE_ALIGNMENT (save_mode) > arg->locate.boundary)
+		save_mode = BLKmode;
+
+	      adr = memory_address (save_mode, XEXP (arg->stack_slot, 0));
+	      stack_area = gen_rtx_MEM (save_mode, adr);

 	      if (save_mode == BLKmode)
 		{
--- a/gcc/c-common.c
+++ b/gcc/c-common.c
@@ -33,7 +33,6 @@
 #include "varray.h"
 #include "expr.h"
 #include "c-common.h"
-#include "diagnostic.h"
 #include "tm_p.h"
 #include "obstack.h"
 #include "cpplib.h"
@@ -42,6 +41,7 @@
 #include "tree-inline.h"
 #include "c-tree.h"
 #include "toplev.h"
+#include "diagnostic.h"
 #include "tree-iterator.h"
 #include "hashtab.h"
 #include "tree-mudflap.h"
@@ -497,6 +497,10 @@
    This is a count, since unevaluated expressions can nest.  */
 int skip_evaluation;

+/* Whether lexing has been completed, so subsequent preprocessor
+   errors should use the compiler's input_location.  */
+bool done_lexing = false;
+
 /* Information about how a function name is generated.  */
 struct fname_var_t
 {
@@ -7522,6 +7526,68 @@
 #undef catenate_messages
 }

+/* Callback from cpp_error for PFILE to print diagnostics from the
+   preprocessor.  The diagnostic is of type LEVEL, at location
+   LOCATION unless this is after lexing and the compiler's location
+   should be used instead, with column number possibly overridden by
+   COLUMN_OVERRIDE if not zero; MSG is the translated message and AP
+   the arguments.  Returns true if a diagnostic was emitted, false
+   otherwise.  */
+
+bool
+c_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int level,
+	     location_t location, unsigned int column_override,
+	     const char *msg, va_list *ap)
+{
+  diagnostic_info diagnostic;
+  diagnostic_t dlevel;
+  int save_warn_system_headers = warn_system_headers;
+  bool ret;
+
+  switch (level)
+    {
+    case CPP_DL_WARNING_SYSHDR:
+      if (flag_no_output)
+	return false;
+      warn_system_headers = 1;
+      /* Fall through.  */
+    case CPP_DL_WARNING:
+      if (flag_no_output)
+	return false;
+      dlevel = DK_WARNING;
+      break;
+    case CPP_DL_PEDWARN:
+      if (flag_no_output && !flag_pedantic_errors)
+	return false;
+      dlevel = DK_PEDWARN;
+      break;
+    case CPP_DL_ERROR:
+      dlevel = DK_ERROR;
+      break;
+    case CPP_DL_ICE:
+      dlevel = DK_ICE;
+      break;
+    case CPP_DL_NOTE:
+      dlevel = DK_NOTE;
+      break;
+    case CPP_DL_FATAL:
+      dlevel = DK_FATAL;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (done_lexing)
+    location = input_location;
+  diagnostic_set_info_translated (&diagnostic, msg, ap,
+				  location, dlevel);
+  if (column_override)
+    diagnostic_override_column (&diagnostic, column_override);
+  ret = report_diagnostic (&diagnostic);
+  if (level == CPP_DL_WARNING_SYSHDR)
+    warn_system_headers = save_warn_system_headers;
+  return ret;
+}
+
 /* Walk a gimplified function and warn for functions whose return value is
    ignored and attribute((warn_unused_result)) is set.  This is done before
    inlining, so we don't have to worry about that.  */
--- a/gcc/c-common.h
+++ b/gcc/c-common.h
@@ -658,6 +658,11 @@

 extern int skip_evaluation;

+/* Whether lexing has been completed, so subsequent preprocessor
+   errors should use the compiler's input_location.  */
+
+extern bool done_lexing;
+
 /* C types are partitioned into three subsets: object, function, and
    incomplete types.  */
 #define C_TYPE_OBJECT_P(type) \
--- a/gcc/c-convert.c
+++ b/gcc/c-convert.c
@@ -70,6 +70,7 @@
   tree e = expr;
   enum tree_code code = TREE_CODE (type);
   const char *invalid_conv_diag;
+  tree ret;

   if (type == error_mark_node
       || expr == error_mark_node
@@ -85,6 +86,9 @@

   if (type == TREE_TYPE (expr))
     return expr;
+  ret = targetm.convert_to_type (type, expr);
+  if (ret)
+      return ret;

   if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (TREE_TYPE (expr)))
     return fold_convert (type, expr);
--- a/gcc/c-decl.c
+++ b/gcc/c-decl.c
@@ -3994,6 +3994,7 @@
   bool bitfield = width != NULL;
   tree element_type;
   struct c_arg_info *arg_info = 0;
+  const char *errmsg;

   if (decl_context == FUNCDEF)
     funcdef_flag = true, decl_context = NORMAL;
@@ -4531,6 +4532,12 @@
 		error ("%qs declared as function returning an array", name);
 		type = integer_type_node;
 	      }
+	    errmsg = targetm.invalid_return_type (type);
+	    if (errmsg)
+	      {
+		error (errmsg);
+		type = integer_type_node;
+	      }

 	    /* Construct the function type and go to the next
 	       inner layer of declarator.  */
@@ -5044,6 +5051,7 @@
     {
       tree parm, type, typelt;
       unsigned int parmno;
+      const char *errmsg;

       /* If there is a parameter of incomplete type in a definition,
 	 this is an error.  In a declaration this is valid, and a
@@ -5087,6 +5095,14 @@
 		}
 	    }

+	  errmsg = targetm.invalid_parameter_type (type);
+	  if (errmsg)
+	    {
+	      error (errmsg);
+	      TREE_VALUE (typelt) = error_mark_node;
+	      TREE_TYPE (parm) = error_mark_node;
+	    }
+
 	  if (DECL_NAME (parm) && TREE_USED (parm))
 	    warn_if_shadowing (parm);
 	}
@@ -8071,7 +8087,7 @@

   /* Don't waste time on further processing if -fsyntax-only or we've
      encountered errors.  */
-  if (flag_syntax_only || errorcount || sorrycount || cpp_errors (parse_in))
+  if (flag_syntax_only || errorcount || sorrycount)
     return;

   /* Close the external scope.  */
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -488,7 +488,8 @@
 {
   unsigned int align;

-  align = LOCAL_DECL_ALIGNMENT (decl);
+  align = alignment_for_aligned_arrays (TREE_TYPE (decl),
+					LOCAL_DECL_ALIGNMENT (decl));

   if (align > MAX_SUPPORTED_STACK_ALIGNMENT)
     align = MAX_SUPPORTED_STACK_ALIGNMENT;
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -475,9 +475,11 @@
   if (DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL)
     {
       node->origin = cgraph_node (DECL_CONTEXT (decl));
+      node->origin->ever_was_nested = 1;
       node->next_nested = node->origin->nested;
       node->origin->nested = node;
       node->master_clone = node;
+      node->ever_was_nested = 1;
     }
   if (assembler_name_hash)
     {
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -185,6 +185,8 @@
   unsigned output : 1;
   /* Set for aliases once they got through assemble_alias.  */
   unsigned alias : 1;
+  /* Set if the function is a nested function or has nested functions.  */
+  unsigned ever_was_nested : 1;

   /* In non-unit-at-a-time mode the function body of inline candidates is saved
      into clone before compiling so the function in original form can be
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -153,6 +153,10 @@
 Common Var(warn_padded) Warning
 Warn when padding is required to align structure members

+Wpoison-system-directories
+Common Var(flag_poison_system_directories) Init(1)
+Warn for -I and -L options using system directories if cross compiling
+
 Wshadow
 Common Var(warn_shadow) Warning
 Warn when one local variable shadows another
@@ -270,6 +274,12 @@
 fabi-version=
 Common Joined UInteger Var(flag_abi_version) Init(2)

+falign-arrays
+Target Report Var(flag_align_arrays)
+Set the minimum alignment for array variables to be the largest power
+of two less than or equal to their total storage size, or the biggest
+alignment used on the machine, whichever is smaller.
+
 falign-functions
 Common Report Var(align_functions,0) Optimization UInteger
 Align the start of functions
@@ -467,6 +477,10 @@
 Common Report Var(flag_early_inlining) Init(1) Optimization
 Perform early inlining

+feglibc=
+Common Report Joined Undocumented
+EGLIBC configuration specifier, serves multilib purposes.
+
 feliminate-dwarf2-dups
 Common Report Var(flag_eliminate_dwarf2_dups)
 Perform DWARF2 duplicate elimination
@@ -895,6 +909,10 @@
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions

+fpromote-loop-indices
+Common Report Var(flag_promote_loop_indices) Optimization
+Promote loop indices to word-sized indices when safe
+
 frandom-seed
 Common

@@ -1227,6 +1245,15 @@
 Common Report Var(flag_tree_pre) Optimization
 Enable SSA-PRE optimization on trees

+ftree-pre-partial-partial
+Common Report Var(flag_tree_pre_partial_partial) Optimization
+In SSA-PRE optimization on trees, enable partial-partial redundancy elimination.
+
+ftree-pre-partial-partial-obliviously
+Common Report Var(flag_tree_pre_partial_partial_obliviously) Optimization
+In SSA-PRE optimization on trees, enable partial-partial redundancy
+elimination without regard for the cost of the inserted phi nodes.
+
 ftree-reassoc
 Common Report Var(flag_tree_reassoc) Init(1) Optimization
 Enable reassociation on tree level
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -43,6 +43,7 @@
 #include "optabs.h"
 #include "toplev.h"
 #include "recog.h"
+#include "cgraph.h"
 #include "ggc.h"
 #include "except.h"
 #include "c-pragma.h"
@@ -53,6 +54,8 @@
 #include "debug.h"
 #include "langhooks.h"
 #include "df.h"
+#include "intl.h"
+#include "params.h"

 /* Forward definitions of types.  */
 typedef struct minipool_node    Mnode;
@@ -110,6 +113,7 @@
 static unsigned long arm_isr_value (tree);
 static unsigned long arm_compute_func_type (void);
 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
@@ -123,6 +127,10 @@
 static int count_insns_for_constant (HOST_WIDE_INT, int);
 static int arm_get_strip_length (int);
 static bool arm_function_ok_for_sibcall (tree, tree);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value (enum machine_mode, rtx);
+
 static void arm_internal_label (FILE *, const char *, unsigned long);
 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 				 tree);
@@ -148,6 +156,9 @@
 static rtx emit_set_insn (rtx, rtx);
 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
 				  tree, bool);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+				      const_tree);
+static int aapcs_select_return_coproc (const_tree, const_tree);

 #ifdef OBJECT_FORMAT_ELF
 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
@@ -175,6 +186,7 @@
 static bool arm_output_ttype (rtx);
 #endif
 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
+static rtx arm_dwarf_register_span(rtx);

 static tree arm_cxx_guard_type (void);
 static bool arm_cxx_guard_mask_bit (void);
@@ -197,6 +209,15 @@
 static int arm_issue_rate (void);
 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 static bool arm_allocate_stack_slots_for_args (void);
+static bool arm_warn_func_result (void);
+static int arm_multipass_dfa_lookahead (void);
+static const char *arm_invalid_parameter_type (const_tree t);
+static const char *arm_invalid_return_type (const_tree t);
+static tree arm_promoted_type (const_tree t);
+static tree arm_convert_to_type (tree type, tree expr);
+static bool arm_scalar_mode_supported_p (enum machine_mode);
+static int arm_vector_min_alignment (const_tree type);
+static bool arm_vector_always_misalign(const_tree);


 /* Initialize the GCC target structure.  */
@@ -256,6 +277,12 @@
 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall

+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
@@ -299,6 +326,9 @@
 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args

+#undef TARGET_WARN_FUNC_RESULT
+#define TARGET_WARN_FUNC_RESULT arm_warn_func_result
+
 #undef TARGET_DEFAULT_SHORT_ENUMS
 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums

@@ -353,6 +383,9 @@
 #undef TARGET_ASM_TTYPE
 #define TARGET_ASM_TTYPE arm_output_ttype

+#undef TARGET_CXX_TTYPE_REF_ENCODE
+#define TARGET_CXX_TTYPE_REF_ENCODE hook_cxx_ttype_ref_in_bit0
+
 #undef TARGET_ARM_EABI_UNWINDER
 #define TARGET_ARM_EABI_UNWINDER true
 #endif /* TARGET_UNWIND_INFO */
@@ -360,6 +393,9 @@
 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec

+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
 #undef  TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p

@@ -398,6 +434,30 @@
 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 #endif

+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD arm_multipass_dfa_lookahead
+
+#undef TARGET_INVALID_PARAMETER_TYPE
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
+
+#undef TARGET_INVALID_RETURN_TYPE
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
+
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE arm_promoted_type
+
+#undef TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MIN_ALIGNMENT
+#define TARGET_VECTOR_MIN_ALIGNMENT arm_vector_min_alignment
+
+#undef TARGET_VECTOR_ALWAYS_MISALIGN
+#define TARGET_VECTOR_ALWAYS_MISALIGN arm_vector_always_misalign
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 /* Obstack for minipool constant handling.  */
@@ -423,18 +483,18 @@
 /* The default processor used if not overridden by commandline.  */
 static enum processor_type arm_default_cpu = arm_none;

-/* Which floating point model to use.  */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  */
-enum fputype arm_fpu_arch;
-
 /* Which floating point hardware to schedule for.  */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use.  */
+const struct arm_fpu_desc *arm_fpu_desc;

 /* Whether to use floating point hardware.  */
 enum float_abi_type arm_float_abi;

+/* Which __fp16 format to use.  */
+enum arm_fp16_format_type arm_fp16_format;
+
 /* Which ABI to use.  */
 enum arm_abi_type arm_abi;

@@ -473,9 +533,19 @@
 #define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
+#define FL_MARVELL_F  (1 << 21)       /* Marvell Feroceon.  */
+#define FL_ARCH7EM    (1 << 22)	      /* Instructions present in ARMv7E-M.  */

 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */

+/* Some flags are ignored when comparing -mcpu and -march:
+    FL_MARVELL_F so that -mcpu=marvell-f -march=v5te works.
+    FL_LDSCHED and FL_WBUF only effect tuning,
+    FL_CO_PROC, FL_VFPV2, FL_VFPV3 and FL_NEON because FP
+      coprocessors are handled separately.  */
+#define FL_COMPAT (FL_MARVELL_F | FL_LDSCHED | FL_WBUF | FL_CO_PROC | \
+		   FL_VFPV2 | FL_VFPV3 | FL_NEON)
+
 #define FL_FOR_ARCH2	FL_NOTM
 #define FL_FOR_ARCH3	(FL_FOR_ARCH2 | FL_MODE32)
 #define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
@@ -497,6 +567,7 @@
 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM)
 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
 #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)

 /* The bits in this mask specify which
    instructions we are allowed to generate.  */
@@ -533,6 +604,9 @@
 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 int arm_arch_notm = 0;

+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+int arm_arch7em = 0;
+
 /* Nonzero if this chip can benefit from load scheduling.  */
 int arm_ld_sched = 0;

@@ -551,6 +625,9 @@
 /* Nonzero if tuning for XScale  */
 int arm_tune_xscale = 0;

+/* Nonzero if tuning for Marvell Feroceon.  */
+int arm_tune_marvell_f = 0;
+
 /* Nonzero if we want to tune for stores that access the write-buffer.
    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 int arm_tune_wbuf = 0;
@@ -561,6 +638,9 @@
 /* Nonzero if generating Thumb instructions.  */
 int thumb_code = 0;

+/* Nonzero if generating code for Janus2. */
+int janus2_code = 0;
+
 /* Nonzero if we should define __THUMB_INTERWORK__ in the
    preprocessor.
    XXX This is a bit of a hack, it's intended to help work around
@@ -593,6 +673,8 @@
 /* The maximum number of insns to be used when loading a constant.  */
 static int arm_constant_limit = 3;

+static enum arm_pcs arm_pcs_default;
+
 /* For an explanation of these variables, see final_prescan_insn below.  */
 int arm_ccfsm_state;
 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
@@ -673,9 +755,11 @@
   {"armv7-a", cortexa8,	  "7A",	 FL_CO_PROC |		  FL_FOR_ARCH7A, NULL},
   {"armv7-r", cortexr4,	  "7R",	 FL_CO_PROC |		  FL_FOR_ARCH7R, NULL},
   {"armv7-m", cortexm3,	  "7M",	 FL_CO_PROC |		  FL_FOR_ARCH7M, NULL},
+  {"armv7e-m",   cortexm3, "7EM", FL_CO_PROC |		  FL_FOR_ARCH7EM, NULL},
   {"ep9312",  ep9312,     "4T",  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
   {"iwmmxt",  iwmmxt,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
   {"iwmmxt2", iwmmxt2,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
+  {"marvell-f", marvell_f, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE | FL_MARVELL_F, NULL},
   {NULL, arm_none, NULL, 0 , NULL}
 };

@@ -705,49 +789,34 @@

 /* The name of the preprocessor macro to define for this architecture.  */

-char arm_arch_name[] = "__ARM_ARCH_0UNK__";
-
-struct fpu_desc
-{
-  const char * name;
-  enum fputype fpu;
-};
-
+#define ARM_ARCH_NAME_SIZE 25
+char arm_arch_name[ARM_ARCH_NAME_SIZE] = "__ARM_ARCH_0UNK__";

 /* Available values for -mfpu=.  */

-static const struct fpu_desc all_fpus[] =
+static const struct arm_fpu_desc all_fpus[] =
 {
-  {"fpa",	FPUTYPE_FPA},
-  {"fpe2",	FPUTYPE_FPA_EMU2},
-  {"fpe3",	FPUTYPE_FPA_EMU2},
-  {"maverick",	FPUTYPE_MAVERICK},
-  {"vfp",	FPUTYPE_VFP},
-  {"vfp3",	FPUTYPE_VFP3},
-  {"vfpv3",	FPUTYPE_VFP3},
-  {"vfpv3-d16",	FPUTYPE_VFP3D16},
-  {"neon",	FPUTYPE_NEON}
+  {"fpa",		ARM_FP_MODEL_FPA, 0, 0, false, false},
+  {"fpe2",		ARM_FP_MODEL_FPA, 2, 0, false, false},
+  {"fpe3",		ARM_FP_MODEL_FPA, 3, 0, false, false},
+  {"maverick",		ARM_FP_MODEL_MAVERICK, 0, 0, false, false},
+  {"vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+  {"vfpv3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+  {"vfpv3-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true },
+  {"vfpv3-d16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+  {"vfpv3xd",		ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
+  {"vfpv3xd-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true },
+  {"vfpv3-d16-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true },
+  {"neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+  {"neon-fp16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+  {"vfpv4",		ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true },
+  {"vfpv4-d16",		ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true },
+  {"fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true },
+  {"neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true , true },
+  /* Compatibility aliases.  */
+  {"vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
 };

-
-/* Floating point models used by the different hardware.
-   See fputype in arm.h.  */
-
-static const enum fputype fp_model_for_fpu[] =
-{
-  /* No FP hardware.  */
-  ARM_FP_MODEL_UNKNOWN,		/* FPUTYPE_NONE  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
-  ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3D16  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3  */
-  ARM_FP_MODEL_VFP		/* FPUTYPE_NEON  */
-};
-
-
 struct float_abi
 {
   const char * name;
@@ -765,6 +834,23 @@
 };


+struct fp16_format
+{
+  const char *name;
+  enum arm_fp16_format_type fp16_format_type;
+};
+
+
+/* Available values for -mfp16-format=.  */
+
+static const struct fp16_format all_fp16_formats[] =
+{
+  {"none",		ARM_FP16_FORMAT_NONE},
+  {"ieee",		ARM_FP16_FORMAT_IEEE},
+  {"alternative",	ARM_FP16_FORMAT_ALTERNATIVE}
+};
+
+
 struct abi_name
 {
   const char *name;
@@ -922,6 +1008,44 @@
   set_optab_libfunc (umod_optab, DImode, NULL);
   set_optab_libfunc (smod_optab, SImode, NULL);
   set_optab_libfunc (umod_optab, SImode, NULL);
+
+  /* Half-precision float operations.  The compiler handles all operations
+     with NULL libfuncs by converting the SFmode.  */
+  switch (arm_fp16_format)
+    {
+    case ARM_FP16_FORMAT_IEEE:
+    case ARM_FP16_FORMAT_ALTERNATIVE:
+
+      /* Conversions.  */
+      set_conv_libfunc (trunc_optab, HFmode, SFmode,
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_f2h_ieee"
+			 : "__gnu_f2h_alternative"));
+      set_conv_libfunc (sext_optab, SFmode, HFmode,
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_h2f_ieee"
+			 : "__gnu_h2f_alternative"));
+
+      /* Arithmetic.  */
+      set_optab_libfunc (add_optab, HFmode, NULL);
+      set_optab_libfunc (sdiv_optab, HFmode, NULL);
+      set_optab_libfunc (smul_optab, HFmode, NULL);
+      set_optab_libfunc (neg_optab, HFmode, NULL);
+      set_optab_libfunc (sub_optab, HFmode, NULL);
+
+      /* Comparisons.  */
+      set_optab_libfunc (eq_optab, HFmode, NULL);
+      set_optab_libfunc (ne_optab, HFmode, NULL);
+      set_optab_libfunc (lt_optab, HFmode, NULL);
+      set_optab_libfunc (le_optab, HFmode, NULL);
+      set_optab_libfunc (ge_optab, HFmode, NULL);
+      set_optab_libfunc (gt_optab, HFmode, NULL);
+      set_optab_libfunc (unord_optab, HFmode, NULL);
+      break;
+
+    default:
+      break;
+    }
 }

 /* On AAPCS systems, this is the "struct __va_list".  */
@@ -1135,6 +1259,7 @@
 arm_override_options (void)
 {
   unsigned i;
+  int len;
   enum processor_type target_arch_cpu = arm_none;
   enum processor_type selected_cpu = arm_none;

@@ -1152,7 +1277,11 @@
               {
 		/* Set the architecture define.  */
 		if (i != ARM_OPT_SET_TUNE)
-		  sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
+		  {
+		    len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE,
+				    "__ARM_ARCH_%s__", sel->arch);
+		    gcc_assert (len < ARM_ARCH_NAME_SIZE);
+		  }

 		/* Determine the processor core for which we should
 		   tune code-generation.  */
@@ -1178,8 +1307,8 @@
 		       make sure that they are compatible.  We only generate
 		       a warning though, and we prefer the CPU over the
 		       architecture.  */
-		    if (insn_flags != 0 && (insn_flags ^ sel->flags))
-		      warning (0, "switch -mcpu=%s conflicts with -march= switch",
+		    if (insn_flags != 0 && ((insn_flags ^ sel->flags) & ~FL_COMPAT))
+		      warning (0, "switch -mcpu=%s conflicts with -march= switch, assuming CPU feature set",
 			       ptr->string);

 		    insn_flags = sel->flags;
@@ -1279,7 +1408,11 @@

 	  insn_flags = sel->flags;
 	}
-      sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
+
+      len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE,
+		      "__ARM_ARCH_%s__", sel->arch);
+      gcc_assert (len < ARM_ARCH_NAME_SIZE);
+
       arm_default_cpu = (enum processor_type) (sel - all_cores);
       if (arm_tune == arm_none)
 	arm_tune = arm_default_cpu;
@@ -1289,8 +1422,35 @@
      chosen.  */
   gcc_assert (arm_tune != arm_none);

+  if (arm_tune == cortexa8 && optimize >= 3)
+    {
+      /* These alignments were experimentally determined to improve SPECint
+	 performance on SPECCPU 2000.  */
+      if (align_functions <= 0)
+	align_functions = 16;
+      if (align_jumps <= 0)
+	align_jumps = 16;
+    }
+
   tune_flags = all_cores[(int)arm_tune].flags;

+  if (target_fp16_format_name)
+    {
+      for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
+	{
+	  if (streq (all_fp16_formats[i].name, target_fp16_format_name))
+	    {
+	      arm_fp16_format = all_fp16_formats[i].fp16_format_type;
+	      break;
+	    }
+	}
+      if (i == ARRAY_SIZE (all_fp16_formats))
+	error ("invalid __fp16 format option: -mfp16-format=%s",
+	       target_fp16_format_name);
+    }
+  else
+    arm_fp16_format = ARM_FP16_FORMAT_NONE;
+
   if (target_abi_name)
     {
       for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
@@ -1383,6 +1543,7 @@
   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
   arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
@@ -1390,12 +1551,25 @@
   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
   thumb_code = (TARGET_ARM == 0);
+  janus2_code = (TARGET_FIX_JANUS != 0);
+  if (janus2_code && TARGET_THUMB2)
+    error ("janus2 fix is not applicable when targeting a thumb2 core");
   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
+  arm_tune_marvell_f = (tune_flags & FL_MARVELL_F) != 0;
   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
-  arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;

+ /* Hardware integer division is supported by some variants of the ARM
+    architecture in Thumb-2 mode.  In addition some (but not all) Marvell
+    CPUs support their own hardware integer division instructions.
+    The assembler will pick the correct encoding.  */
+  if (TARGET_MARVELL_DIV && (insn_flags & FL_MARVELL_F) == 0)
+    error ("-mmarvell-div is only supported when targeting a Marvell core");
+
+  arm_arch_hwdiv = (TARGET_ARM && TARGET_MARVELL_DIV)
+		   || (TARGET_THUMB2 && (insn_flags & FL_DIV) != 0);
+
   /* If we are not using the default (ARM mode) section anchor offset
      ranges, then set the correct ranges now.  */
   if (TARGET_THUMB1)
@@ -1434,7 +1608,6 @@
   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
     error ("iwmmxt abi requires an iwmmxt capable cpu");

-  arm_fp_model = ARM_FP_MODEL_UNKNOWN;
   if (target_fpu_name == NULL && target_fpe_name != NULL)
     {
       if (streq (target_fpe_name, "2"))
@@ -1445,46 +1618,52 @@
 	error ("invalid floating point emulation option: -mfpe=%s",
 	       target_fpe_name);
     }
-  if (target_fpu_name != NULL)
-    {
-      /* The user specified a FPU.  */
-      for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
-	{
-	  if (streq (all_fpus[i].name, target_fpu_name))
-	    {
-	      arm_fpu_arch = all_fpus[i].fpu;
-	      arm_fpu_tune = arm_fpu_arch;
-	      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-	      break;
-	    }
-	}
-      if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
-	error ("invalid floating point option: -mfpu=%s", target_fpu_name);
-    }
-  else
+
+  if (target_fpu_name == NULL)
     {
 #ifdef FPUTYPE_DEFAULT
-      /* Use the default if it is specified for this platform.  */
-      arm_fpu_arch = FPUTYPE_DEFAULT;
-      arm_fpu_tune = FPUTYPE_DEFAULT;
+      target_fpu_name = FPUTYPE_DEFAULT;
 #else
-      /* Pick one based on CPU type.  */
-      /* ??? Some targets assume FPA is the default.
-      if ((insn_flags & FL_VFP) != 0)
-	arm_fpu_arch = FPUTYPE_VFP;
-      else
-      */
       if (arm_arch_cirrus)
-	arm_fpu_arch = FPUTYPE_MAVERICK;
+	target_fpu_name = "maverick";
       else
-	arm_fpu_arch = FPUTYPE_FPA_EMU2;
+	target_fpu_name = "fpe2";
 #endif
-      if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
-	arm_fpu_tune = FPUTYPE_FPA;
+    }
+
+  arm_fpu_desc = NULL;
+  for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+    {
+      if (streq (all_fpus[i].name, target_fpu_name))
+	{
+	  arm_fpu_desc = &all_fpus[i];
+	  break;
+	}
+    }
+  if (!arm_fpu_desc)
+    error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+
+  switch (arm_fpu_desc->model)
+    {
+    case ARM_FP_MODEL_FPA:
+      if (arm_fpu_desc->rev == 2)
+	arm_fpu_attr = FPU_FPE2;
+      else if (arm_fpu_desc->rev == 3)
+	arm_fpu_attr = FPU_FPE3;
       else
-	arm_fpu_tune = arm_fpu_arch;
-      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-      gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+	arm_fpu_attr = FPU_FPA;
+      break;
+
+    case ARM_FP_MODEL_MAVERICK:
+      arm_fpu_attr = FPU_MAVERICK;
+      break;
+
+    case ARM_FP_MODEL_VFP:
+      arm_fpu_attr = FPU_VFP;
+      break;
+
+    default:
+      gcc_unreachable();
     }

   if (target_float_abi_name != NULL)
@@ -1505,9 +1684,6 @@
   else
     arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;

-  if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
-    sorry ("-mfloat-abi=hard and VFP");
-
   /* FPA and iWMMXt are incompatible because the insn encodings overlap.
      VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
      will ever exist.  GCC makes no attempt to support this combination.  */
@@ -1518,15 +1694,40 @@
   if (TARGET_THUMB2 && TARGET_IWMMXT)
     sorry ("Thumb-2 iWMMXt");

+  /* __fp16 support currently assumes the core has ldrh.  */
+  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+    sorry ("__fp16 and no ldrh");
+
   /* If soft-float is specified then don't use FPU.  */
   if (TARGET_SOFT_FLOAT)
-    arm_fpu_arch = FPUTYPE_NONE;
+    arm_fpu_attr = FPU_NONE;
+
+  if (TARGET_AAPCS_BASED)
+    {
+      if (arm_abi == ARM_ABI_IWMMXT)
+	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+      else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+	       && TARGET_HARD_FLOAT
+	       && TARGET_VFP)
+	arm_pcs_default = ARM_PCS_AAPCS_VFP;
+      else
+	arm_pcs_default = ARM_PCS_AAPCS;
+    }
+  else
+    {
+      if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+	sorry ("-mfloat-abi=hard and VFP");
+
+      if (arm_abi == ARM_ABI_APCS)
+	arm_pcs_default = ARM_PCS_APCS;
+      else
+	arm_pcs_default = ARM_PCS_ATPCS;
+    }

   /* For arm2/3 there is no need to do any scheduling if there is only
      a floating point emulator, or we are doing software floating-point.  */
   if ((TARGET_SOFT_FLOAT
-       || arm_fpu_tune == FPUTYPE_FPA_EMU2
-       || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+       || (TARGET_FPA && arm_fpu_desc->rev))
       && (tune_flags & FL_MODE32) == 0)
     flag_schedule_insns = flag_schedule_insns_after_reload = 0;

@@ -1616,8 +1817,7 @@
 	fix_cm3_ldrd = 0;
     }

-  /* ??? We might want scheduling for thumb2.  */
-  if (TARGET_THUMB && flag_schedule_insns)
+  if (TARGET_THUMB1 && flag_schedule_insns)
     {
       /* Don't warn since it's on by default in -O2.  */
       flag_schedule_insns = 0;
@@ -1653,6 +1853,36 @@

   /* Register global variables with the garbage collector.  */
   arm_add_gc_roots ();
+
+  if (low_irq_latency && TARGET_THUMB)
+    {
+      warning (0,
+	       "-low-irq-latency has no effect when compiling for the Thumb");
+      low_irq_latency = 0;
+    }
+
+  /* CSL LOCAL */
+  /* Loop unrolling can be a substantial win.  At -O2, limit to 2x
+     unrolling by default to prevent excessive code growth; at -O3,
+     limit to 4x unrolling by default.  We know we are not optimizing
+     for size if this is set (see arm_optimization_options).  */
+  if (flag_unroll_loops == 2)
+    {
+      if (optimize == 2)
+	{
+	  flag_unroll_loops = 1;
+	  if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES))
+	    set_param_value ("max-unroll-times", 2);
+	}
+      else if (optimize > 2)
+	{
+	  flag_unroll_loops = 1;
+	  if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES))
+	    set_param_value ("max-unroll-times", 4);
+	}
+      else
+	flag_unroll_loops = 0;
+    }
 }

 static void
@@ -1782,6 +2012,14 @@
   return !IS_NAKED (arm_current_func_type ());
 }

+static bool
+arm_warn_func_result (void)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return !IS_NAKED (arm_current_func_type ());
+}
+

 /* Return 1 if it is possible to return using a single instruction.
    If SIBLING is non-null, this is a test for a return before a sibling
@@ -2873,14 +3111,19 @@

 /* Define how to find the value returned by a function.  */

-rtx
-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
+static rtx
+arm_function_value(const_tree type, const_tree func,
+		   bool outgoing ATTRIBUTE_UNUSED)
 {
   enum machine_mode mode;
   int unsignedp ATTRIBUTE_UNUSED;
   rtx r ATTRIBUTE_UNUSED;

   mode = TYPE_MODE (type);
+
+  if (TARGET_AAPCS_BASED)
+    return aapcs_allocate_return_reg (mode, type, func);
+
   /* Promote integer types.  */
   if (INTEGRAL_TYPE_P (type))
     PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
@@ -2897,7 +3140,36 @@
 	}
     }

-  return LIBCALL_VALUE(mode);
+  return LIBCALL_VALUE (mode);
+}
+
+rtx
+arm_libcall_value (enum machine_mode mode, rtx libcall)
+{
+  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      /* The following libcalls return their result in integer registers,
+	 even though they return a floating point value.  */
+      if (rtx_equal_p (libcall,
+		       convert_optab_libfunc (sfloat_optab, mode, SImode))
+	  || rtx_equal_p (libcall,
+			  convert_optab_libfunc (ufloat_optab, mode, SImode))
+	  || rtx_equal_p (libcall,
+			  convert_optab_libfunc (sfloat_optab, mode, DImode))
+	  || rtx_equal_p (libcall,
+			  convert_optab_libfunc (ufloat_optab, mode, DImode))
+	  || rtx_equal_p (libcall,
+			  convert_optab_libfunc (trunc_optab, HFmode, SFmode))
+	  || rtx_equal_p (libcall,
+			  convert_optab_libfunc (sext_optab, SFmode, HFmode)))
+	return gen_rtx_REG (mode, ARG_REGISTER(1));
+
+      /* XXX There are other libcalls that return in integer registers,
+	 but I think they are all handled by hard insns.  */
+    }
+
+  return LIBCALL_VALUE (mode);
 }

 /* Determine the amount of memory needed to store the possible return
@@ -2907,10 +3179,12 @@
 {
   int size = 16;

-  if (TARGET_ARM)
+  if (TARGET_32BIT)
     {
       if (TARGET_HARD_FLOAT_ABI)
 	{
+	  if (TARGET_VFP)
+	    size += 32;
 	  if (TARGET_FPA)
 	    size += 12;
 	  if (TARGET_MAVERICK)
@@ -2923,27 +3197,56 @@
   return size;
 }

-/* Decide whether a type should be returned in memory (true)
-   or in a register (false).  This is called as the target hook
-   TARGET_RETURN_IN_MEMORY.  */
+/* Decide whether TYPE should be returned in memory (true)
+   or in a register (false).  FNTYPE is the type of the function making
+   the call.  */
 static bool
-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+arm_return_in_memory (const_tree type, const_tree fntype)
 {
   HOST_WIDE_INT size;

-  size = int_size_in_bytes (type);
+  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Simple, non-aggregate types (ie not including vectors and
+	 complex) are always returned in a register (or registers).
+	 We don't care about which register here, so we can short-cut
+	 some of the detail.  */
+      if (!AGGREGATE_TYPE_P (type)
+	  && TREE_CODE (type) != VECTOR_TYPE
+	  && TREE_CODE (type) != COMPLEX_TYPE)
+	return false;
+
+      /* Any return value that is no larger than one word can be
+	 returned in r0.  */
+      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+	return false;
+
+      /* Check any available co-processors to see if they accept the
+	 type as a register candidate (VFP, for example, can return
+	 some aggregates in consecutive registers).  These aren't
+	 available if the call is variadic.  */
+      if (aapcs_select_return_coproc (type, fntype) >= 0)
+	return false;
+
+      /* Vector values should be returned using ARM registers, not
+	 memory (unless they're over 16 bytes, which will break since
+	 we only have four call-clobbered registers to play with).  */
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+      /* The rest go in memory.  */
+      return true;
+    }

-  /* Vector values should be returned using ARM registers, not memory (unless
-     they're over 16 bytes, which will break since we only have four
-     call-clobbered registers to play with).  */
   if (TREE_CODE (type) == VECTOR_TYPE)
     return (size < 0 || size > (4 * UNITS_PER_WORD));

   if (!AGGREGATE_TYPE_P (type) &&
-      !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
-    /* All simple types are returned in registers.
-       For AAPCS, complex types are treated the same as aggregates.  */
-    return 0;
+      (TREE_CODE (type) != VECTOR_TYPE))
+    /* All simple types are returned in registers.  */
+    return false;

   if (arm_abi != ARM_ABI_APCS)
     {
@@ -2960,7 +3263,7 @@
      the aggregate is either huge or of variable size, and in either case
      we will want to return it via memory and not in a register.  */
   if (size < 0 || size > UNITS_PER_WORD)
-    return 1;
+    return true;

   if (TREE_CODE (type) == RECORD_TYPE)
     {
@@ -2980,18 +3283,18 @@
 	continue;

       if (field == NULL)
-	return 0; /* An empty structure.  Allowed by an extension to ANSI C.  */
+	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */

       /* Check that the first field is valid for returning in a register.  */

       /* ... Floats are not allowed */
       if (FLOAT_TYPE_P (TREE_TYPE (field)))
-	return 1;
+	return true;

       /* ... Aggregates that are not themselves valid for returning in
 	 a register are not allowed.  */
       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
-	return 1;
+	return true;

       /* Now check the remaining fields, if any.  Only bitfields are allowed,
 	 since they are not addressable.  */
@@ -3003,10 +3306,10 @@
 	    continue;

 	  if (!DECL_BIT_FIELD_TYPE (field))
-	    return 1;
+	    return true;
 	}

-      return 0;
+      return false;
     }

   if (TREE_CODE (type) == UNION_TYPE)
@@ -3023,18 +3326,18 @@
 	    continue;

 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
-	    return 1;
+	    return true;

 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
-	    return 1;
+	    return true;
 	}

-      return 0;
+      return false;
     }
 #endif /* not ARM_WINCE */

   /* Return all other types in memory.  */
-  return 1;
+  return true;
 }

 /* Indicate whether or not words of a double are in big-endian order.  */
@@ -3059,14 +3362,780 @@
   return 1;
 }

+const struct pcs_attribute_arg
+{
+  const char *arg;
+  enum arm_pcs value;
+} pcs_attribute_args[] =
+  {
+    {"aapcs", ARM_PCS_AAPCS},
+    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+    {"atpcs", ARM_PCS_ATPCS},
+    {"apcs", ARM_PCS_APCS},
+    {NULL, ARM_PCS_UNKNOWN}
+  };
+
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+  const struct pcs_attribute_arg *ptr;
+  const char *arg;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (attr) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+    return ARM_PCS_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+    if (streq (arg, ptr->arg))
+      return ptr->value;
+
+  /* An unrecognized interrupt type.  */
+  return ARM_PCS_UNKNOWN;
+}
+
+/* Get the PCS variant to use for this call.  TYPE is the function's type
+   specification, DECL is the specific declartion.  DECL may be null if
+   the call could be indirect or if this is a library call.  */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+  bool user_convention = false;
+  enum arm_pcs user_pcs = arm_pcs_default;
+  tree attr;
+
+  gcc_assert (type);
+
+  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+  if (attr)
+    {
+      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+      user_convention = true;
+    }
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Detect varargs functions.  These always use the base rules
+	 (no argument is ever a candidate for a co-processor
+	 register).  */
+      bool base_rules = (TYPE_ARG_TYPES (type) != 0
+			 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
+			     != void_type_node));
+
+      if (user_convention)
+	{
+	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+	    sorry ("Non-AAPCS derived PCS variant");
+	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+	    error ("Variadic functions must use the base AAPCS variant");
+	}
+
+      if (base_rules)
+	return ARM_PCS_AAPCS;
+      else if (user_convention)
+	return user_pcs;
+      else if (decl && flag_unit_at_a_time)
+	{
+	  /* Local functions never leak outside this compilation unit,
+	     so we are free to use whatever conventions are
+	     appropriate.  */
+	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
+	  struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+	  if (i && i->local)
+	    return ARM_PCS_AAPCS_LOCAL;
+	}
+    }
+  else if (user_convention && user_pcs != arm_pcs_default)
+    sorry ("PCS variant");
+
+  /* For everything else we use the target's default.  */
+  return arm_pcs_default;
+}
+
+
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		    const_tree fntype ATTRIBUTE_UNUSED,
+		    rtx libcall ATTRIBUTE_UNUSED,
+		    const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  /* Record the unallocated VFP registers.  */
+  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+  pcum->aapcs_vfp_reg_alloc = 0;
+}
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   type.  If a non-floating point type is found, or if a floating point
+   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+   otherwise return the count in the sub-tree.  */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      /* Use V2SImode and V4SImode as representatives of all 64-bit
+	 and 128-bit vector types, whether or not those modes are
+	 supported with the present options.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 8:
+	  mode = V2SImode;
+	  break;
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !TYPE_MIN_VALUE (index)
+	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* Return true if PCS_VARIANT should use VFP registers.  */
+static bool
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
+{
+  if (pcs_variant == ARM_PCS_AAPCS_VFP)
+    return true;
+
+  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+    return false;
+
+  return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+	  (TARGET_VFP_DOUBLE || !is_double));
+}
+
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+				       enum machine_mode mode, const_tree type,
+				       int *base_mode, int *count)
+{
+  enum machine_mode new_mode = VOIDmode;
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT
+      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      *count = 1;
+      new_mode = mode;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      *count = 2;
+      new_mode = (mode == DCmode ? DFmode : SFmode);
+    }
+  else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
+    {
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+      if (ag_count > 0 && ag_count <= 4)
+	*count = ag_count;
+      else
+	return false;
+    }
+  else
+    return false;
+
+
+  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+    return false;
+
+  *base_mode = new_mode;
+  return true;
+}
+
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+			       enum machine_mode mode, const_tree type)
+{
+  int count ATTRIBUTE_UNUSED;
+  int ag_mode ATTRIBUTE_UNUSED;
+
+  if (!use_vfp_abi (pcs_variant, false))
+    return false;
+  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+						&ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			     const_tree type)
+{
+  if (!use_vfp_abi (pcum->pcs_variant, false))
+    return false;
+
+  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
+						&pcum->aapcs_vfp_rmode,
+						&pcum->aapcs_vfp_rcount);
+}
+
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		    const_tree type  ATTRIBUTE_UNUSED)
+{
+  int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+  int regno;
+
+  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+      {
+	pcum->aapcs_vfp_reg_alloc = mask << regno;
+	if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+	  {
+	    int i;
+	    int rcount = pcum->aapcs_vfp_rcount;
+	    int rshift = shift;
+	    enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+	    rtx par;
+	    if (!TARGET_NEON)
+	      {
+		/* Avoid using unsupported vector modes.  */
+		if (rmode == V2SImode)
+		  rmode = DImode;
+		else if (rmode == V4SImode)
+		  {
+		    rmode = DImode;
+		    rcount *= 2;
+		    rshift /= 2;
+		  }
+	      }
+	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+	    for (i = 0; i < rcount; i++)
+	      {
+		rtx tmp = gen_rtx_REG (rmode,
+				       FIRST_VFP_REGNUM + regno + i * rshift);
+		tmp = gen_rtx_EXPR_LIST
+		  (VOIDmode, tmp,
+		   GEN_INT (i * GET_MODE_SIZE (rmode)));
+		XVECEXP (par, 0, i) = tmp;
+	      }
+
+	    pcum->aapcs_reg = par;
+	  }
+	else
+	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+	return true;
+      }
+  return false;
+}
+
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+			       enum machine_mode mode,
+			       const_tree type ATTRIBUTE_UNUSED)
+{
+  if (!use_vfp_abi (pcs_variant, false))
+    return false;
+
+  if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+    {
+      int count;
+      int ag_mode;
+      int i;
+      rtx par;
+      int shift;
+
+      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+					     &ag_mode, &count);
+
+      if (!TARGET_NEON)
+	{
+	  if (ag_mode == V2SImode)
+	    ag_mode = DImode;
+	  else if (ag_mode == V4SImode)
+	    {
+	      ag_mode = DImode;
+	      count *= 2;
+	    }
+	}
+      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+      for (i = 0; i < count; i++)
+	{
+	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+	  XVECEXP (par, 0, i) = tmp;
+	}
+
+      return par;
+    }
+
+  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
+}
+
+static void
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		   enum machine_mode mode  ATTRIBUTE_UNUSED,
+		   const_tree type  ATTRIBUTE_UNUSED)
+{
+  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+  pcum->aapcs_vfp_reg_alloc = 0;
+  return;
+}
+
+#define AAPCS_CP(X)				\
+  {						\
+    aapcs_ ## X ## _cum_init,			\
+    aapcs_ ## X ## _is_call_candidate,		\
+    aapcs_ ## X ## _allocate,			\
+    aapcs_ ## X ## _is_return_candidate,	\
+    aapcs_ ## X ## _allocate_return_reg,	\
+    aapcs_ ## X ## _advance			\
+  }
+
+/* Table of co-processors that can be used to pass arguments in
+   registers.  Idealy no arugment should be a candidate for more than
+   one co-processor table entry, but the table is processed in order
+   and stops after the first match.  If that entry then fails to put
+   the argument into a co-processor register, the argument will go on
+   the stack.  */
+static struct
+{
+  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
+  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+  /* Return true if an argument of mode MODE (or type TYPE if MODE is
+     BLKmode) is a candidate for this co-processor's registers; this
+     function should ignore any position-dependent state in
+     CUMULATIVE_ARGS and only use call-type dependent information.  */
+  bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if the argument does get a co-processor register; it
+     should set aapcs_reg to an RTX of the register allocated as is
+     required for a return from FUNCTION_ARG.  */
+  bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if a result of mode MODE (or type TYPE if MODE is
+     BLKmode) is can be returned in this co-processor's registers.  */
+  bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Allocate and return an RTX element to hold the return type of a
+     call, this routine must not fail and will only be called if
+     is_return_candidate returned true with the same parameters.  */
+  rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Finish processing this argument and prepare to start processing
+     the next one.  */
+  void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+  {
+    AAPCS_CP(vfp)
+  };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			  tree type)
+{
+  int i;
+
+  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+      return i;
+
+  return -1;
+}
+
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
+							TYPE_MODE (type),
+							type))
+	  return i;
+    }
+  return -1;
+}
+
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+			   const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  /* Promote integer types.  */
+  if (type && INTEGRAL_TYPE_P (type))
+    PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+							type))
+	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+							     mode, type);
+    }
+
+  /* Promotes small structs returned in a register to full-word size
+     for big-endian AAPCS.  */
+  if (type && arm_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
+}
+
+/* Lay out a function argument using the AAPCS rules.  The rule
+   numbers referred to here are those in the AAPCS.  */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		  tree type, int named)
+{
+  int nregs, nregs2;
+  int ncrn;
+
+  /* We only need to do this once per argument.  */
+  if (pcum->aapcs_arg_processed)
+    return;
+
+  pcum->aapcs_arg_processed = true;
+
+  /* Special case: if named is false then we are handling an incoming
+     anonymous argument which is on the stack.  */
+  if (!named)
+    return;
+
+  /* Is this a potential co-processor register candidate?  */
+  if (pcum->pcs_variant != ARM_PCS_AAPCS)
+    {
+      int slot = aapcs_select_call_coproc (pcum, mode, type);
+      pcum->aapcs_cprc_slot = slot;
+
+      /* We don't have to apply any of the rules from part B of the
+	 preparation phase, these are handled elsewhere in the
+	 compiler.  */
+
+      if (slot >= 0)
+	{
+	  /* A Co-processor register candidate goes either in its own
+	     class of registers or on the stack.  */
+	  if (!pcum->aapcs_cprc_failed[slot])
+	    {
+	      /* C1.cp - Try to allocate the argument to co-processor
+		 registers.  */
+	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+		return;
+
+	      /* C2.cp - Put the argument on the stack and note that we
+		 can't assign any more candidates in this slot.  We also
+		 need to note that we have allocated stack space, so that
+		 we won't later try to split a non-cprc candidate between
+		 core registers and the stack.  */
+	      pcum->aapcs_cprc_failed[slot] = true;
+	      pcum->can_split = false;
+	    }
+
+	  /* We didn't get a register, so this argument goes on the
+	     stack.  */
+	  gcc_assert (pcum->can_split == false);
+	  return;
+	}
+    }
+
+  /* C3 - For double-word aligned arguments, round the NCRN up to the
+     next even number.  */
+  ncrn = pcum->aapcs_ncrn;
+  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+    ncrn++;
+
+  nregs = ARM_NUM_REGS2(mode, type);
+
+  /* Sigh, this test should really assert that nregs > 0, but a GCC
+     extension allows empty structs and then gives them empty size; it
+     then allows such a structure to be passed by value.  For some of
+     the code below we have to pretend that such an argument has
+     non-zero size so that we 'locate' it correctly either in
+     registers or on the stack.  */
+  gcc_assert (nregs >= 0);
+
+  nregs2 = nregs ? nregs : 1;
+
+  /* C4 - Argument fits entirely in core registers.  */
+  if (ncrn + nregs2 <= NUM_ARG_REGS)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = ncrn + nregs;
+      return;
+    }
+
+  /* C5 - Some core registers left and there are no arguments already
+     on the stack: split this argument between the remaining core
+     registers and the stack.  */
+  if (ncrn < NUM_ARG_REGS && pcum->can_split)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+      return;
+    }
+
+  /* C6 - NCRN is set to 4.  */
+  pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
+  return;
+}
+
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
    For a library call, FNTYPE is NULL.  */
 void
 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
-			  rtx libname  ATTRIBUTE_UNUSED,
+			  rtx libname,
 			  tree fndecl ATTRIBUTE_UNUSED)
 {
+  /* Long call handling.  */
+  if (fntype)
+    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+  else
+    pcum->pcs_variant = arm_pcs_default;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      /* XXX We should also detect some library calls here and handle
+	 them using the base rules too; for example the floating point
+	 support functions always work this way.  */
+
+       if (rtx_equal_p (libname,
+		       convert_optab_libfunc (sfix_optab, DImode, DFmode))
+	  || rtx_equal_p (libname,
+			  convert_optab_libfunc (ufix_optab, DImode, DFmode))
+	  || rtx_equal_p (libname,
+			  convert_optab_libfunc (sfix_optab, DImode, SFmode))
+	  || rtx_equal_p (libname,
+			  convert_optab_libfunc (ufix_optab, DImode, SFmode))
+	  || rtx_equal_p (libname,
+			  convert_optab_libfunc (trunc_optab, HFmode, SFmode))
+	  || rtx_equal_p (libname,
+			  convert_optab_libfunc (sext_optab, SFmode, HFmode)))
+	pcum->pcs_variant = ARM_PCS_AAPCS;
+
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_cprc_slot = -1;
+      pcum->can_split = true;
+
+      if (pcum->pcs_variant != ARM_PCS_AAPCS)
+	{
+	  int i;
+
+	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	    {
+	      pcum->aapcs_cprc_failed[i] = false;
+	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+	    }
+	}
+      return;
+    }
+
+  /* Legacy ABIs */
+
   /* On the ARM, the offset starts at 0.  */
   pcum->nregs = 0;
   pcum->iwmmxt_nregs = 0;
@@ -3120,6 +4189,17 @@
 {
   int nregs;

+  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
+     a call insn (op3 of a call_value insn).  */
+  if (mode == VOIDmode)
+    return const0_rtx;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_reg;
+    }
+
   /* Varargs vectors are treated the same as long long.
      named_count avoids having to change the way arm handles 'named' */
   if (TARGET_IWMMXT_ABI
@@ -3161,10 +4241,16 @@

 static int
 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
-		       tree type, bool named ATTRIBUTE_UNUSED)
+		       tree type, bool named)
 {
   int nregs = pcum->nregs;

+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_partial;
+    }
+
   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
     return 0;

@@ -3176,6 +4262,39 @@
   return 0;
 }

+void
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			  tree type, bool named)
+{
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+
+      if (pcum->aapcs_cprc_slot >= 0)
+	{
+	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+							      type);
+	  pcum->aapcs_cprc_slot = -1;
+	}
+
+      /* Generic stuff.  */
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+    }
+  else
+    {
+      pcum->nargs += 1;
+      if (arm_vector_mode_supported_p (mode)
+	  && pcum->named_count > pcum->nargs
+	  && TARGET_IWMMXT_ABI)
+	pcum->iwmmxt_nregs += 1;
+      else
+	pcum->nregs += ARM_NUM_REGS2 (mode, type);
+    }
+}
+
 /* Variable sized types are passed by reference.  This is a GCC
    extension to the ARM ABI.  */

@@ -3226,6 +4345,8 @@
   /* Whereas these functions are always known to reside within the 26 bit
      addressing range.  */
   { "short_call",   0, 0, false, true,  true,  NULL },
+  /* Specify the procedure call conventions for a function.  */
+  { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute },
   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute },
   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute },
@@ -3328,6 +4449,21 @@
   return NULL_TREE;
 }

+/* Handle a "pcs" attribute; arguments as in struct
+   attribute_spec.handler.  */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+    {
+      warning (OPT_Wattributes, "%qs attribute ignored",
+	       IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 /* Handle the "notshared" attribute.  This attribute is another way of
    requesting hidden visibility.  ARM's compiler supports
@@ -3489,7 +4625,7 @@

 /* Return nonzero if it is ok to make a tail-call to DECL.  */
 static bool
-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+arm_function_ok_for_sibcall (tree decl, tree exp)
 {
   unsigned long func_type;

@@ -3522,6 +4658,21 @@
   if (IS_INTERRUPT (func_type))
     return false;

+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a VFP register but then need to transfer it to a core
+	 register.  */
+      rtx a, b;
+
+      a = arm_function_value (TREE_TYPE (exp), decl, false);
+      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+			      cfun->decl, false);
+      if (!rtx_equal_p (a, b))
+	return false;
+    }
+
   /* Never tailcall if function may be called with a misaligned SP.  */
   if (IS_STACKALIGN (func_type))
     return false;
@@ -4120,6 +5271,7 @@
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
+		|| mode == HFmode
 		|| (mode == QImode && outer == SIGN_EXTEND))))
     {
       if (code == MULT)
@@ -4148,13 +5300,15 @@
      load.  */
   if (arm_arch4)
     {
-      if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
+      if (mode == HImode
+	  || mode == HFmode
+	  || (outer == SIGN_EXTEND && mode == QImode))
 	range = 256;
       else
 	range = 4096;
     }
   else
-    range = (mode == HImode) ? 4095 : 4096;
+    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;

   return (code == CONST_INT
 	  && INTVAL (index) < range
@@ -4325,7 +5479,8 @@
     return 1;

   /* This is PC relative data after arm_reorg runs.  */
-  else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
+  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
+	   && reload_completed
 	   && (GET_CODE (x) == LABEL_REF
 	       || (GET_CODE (x) == CONST
 		   && GET_CODE (XEXP (x, 0)) == PLUS
@@ -5024,7 +6179,7 @@
     case UMOD:
       if (TARGET_HARD_FLOAT && mode == SFmode)
 	*total = COSTS_N_INSNS (2);
-      else if (TARGET_HARD_FLOAT && mode == DFmode)
+      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
 	*total = COSTS_N_INSNS (4);
       else
 	*total = COSTS_N_INSNS (20);
@@ -5063,23 +6218,6 @@
       return true;

     case MINUS:
-      if (TARGET_THUMB2)
-	{
-	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	    {
-	      if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
-		*total = COSTS_N_INSNS (1);
-	      else
-		*total = COSTS_N_INSNS (20);
-	    }
-	  else
-	    *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-	  /* Thumb2 does not have RSB, so all arguments must be
-	     registers (subtracting a constant is canonicalized as
-	     addition of the negated constant).  */
-	  return false;
-	}
-
       if (mode == DImode)
 	{
 	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -5102,7 +6240,9 @@

       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
@@ -5143,6 +6283,17 @@
 	  return true;
 	}

+      /* A shift as a part of RSB costs no more than RSB itself.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	  && ((INTVAL (XEXP (XEXP (x, 0), 1))
+	       & (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
+	{
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  return true;
+	}
+
       if (subcode == MULT
 	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
 	  && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
@@ -5164,6 +6315,19 @@
 	  return true;
 	}

+      /* MLS is just as expensive as its underlying multiplication.
+	 Exclude a shift by a constant, which is expressed as a
+	 multiplication.  */
+      if (TARGET_32BIT && arm_arch_thumb2
+	  && GET_CODE (XEXP (x, 1)) == MULT
+	  && ! (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
+		&& ((INTVAL (XEXP (XEXP (x, 1), 1)) &
+		     (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
+	{
+	  /* The cost comes from the cost of the multiply.  */
+	  return false;
+	}
+
       /* Fall through */

     case PLUS:
@@ -5192,7 +6356,9 @@

       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
@@ -5307,7 +6473,9 @@
     case NEG:
       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      return false;
@@ -5460,7 +6628,9 @@
     case ABS:
       if (GET_MODE_CLASS (mode == MODE_FLOAT))
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      return false;
@@ -5563,7 +6733,8 @@
       return true;

     case CONST_DOUBLE:
-      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
+      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
 	*total = COSTS_N_INSNS (1);
       else
 	*total = COSTS_N_INSNS (4);
@@ -5638,7 +6809,8 @@
       return false;

     case MINUS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
 	{
 	  *total = COSTS_N_INSNS (1);
 	  return false;
@@ -5668,7 +6840,8 @@
       return false;

     case PLUS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
 	{
 	  *total = COSTS_N_INSNS (1);
 	  return false;
@@ -5698,7 +6871,8 @@
       return false;

     case NEG:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
 	{
 	  *total = COSTS_N_INSNS (1);
 	  return false;
@@ -5722,7 +6896,8 @@
       return false;

     case ABS:
-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
 	*total = COSTS_N_INSNS (1);
       else
 	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
@@ -5939,7 +7114,9 @@

       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      return false;
@@ -6096,7 +7273,9 @@

       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
-	  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
 	    {
 	      *total = COSTS_N_INSNS (1);
 	      return false;
@@ -6919,10 +8098,13 @@
 }

 /* Return TRUE if OP is a memory operand which we can load or store a vector
-   to/from. If CORE is true, we're moving from ARM registers not Neon
-   registers.  */
+   to/from. TYPE is one of the following values:
+    0 - Vector load/stor (vldr)
+    1 - Core registers (ldm)
+    2 - Element/structure loads (vld1)
+ */
 int
-neon_vector_mem_operand (rtx op, bool core)
+neon_vector_mem_operand (rtx op, int type)
 {
   rtx ind;

@@ -6955,23 +8137,16 @@
     return arm_address_register_rtx_p (ind, 0);

   /* Allow post-increment with Neon registers.  */
-  if (!core && GET_CODE (ind) == POST_INC)
+  if ((type != 1 && GET_CODE (ind) == POST_INC)
+      || (type == 0 && GET_CODE (ind) == PRE_DEC))
     return arm_address_register_rtx_p (XEXP (ind, 0), 0);

-#if 0
-  /* FIXME: We can support this too if we use VLD1/VST1.  */
-  if (!core
-      && GET_CODE (ind) == POST_MODIFY
-      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
-      && GET_CODE (XEXP (ind, 1)) == PLUS
-      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
-    ind = XEXP (ind, 1);
-#endif
+  /* FIXME: vld1 allows register post-modify.  */

   /* Match:
      (plus (reg)
           (const)).  */
-  if (!core
+  if (type == 0
       && GET_CODE (ind) == PLUS
       && GET_CODE (XEXP (ind, 0)) == REG
       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
@@ -7038,10 +8213,19 @@
 enum reg_class
 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
 {
+  if (mode == HFmode)
+    {
+      if (!TARGET_NEON_FP16)
+	return GENERAL_REGS;
+      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+	return NO_REGS;
+      return GENERAL_REGS;
+    }
+
   if (TARGET_NEON
       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
-      && neon_vector_mem_operand (x, FALSE))
+      && neon_vector_mem_operand (x, 0))
      return NO_REGS;

   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
@@ -7438,6 +8622,9 @@
   int base_reg = -1;
   int i;

+  if (low_irq_latency)
+    return 0;
+
   /* Can only handle 2, 3, or 4 insns at present,
      though could be easily extended if required.  */
   gcc_assert (nops >= 2 && nops <= 4);
@@ -7667,6 +8854,9 @@
   int base_reg = -1;
   int i;

+  if (low_irq_latency)
+    return 0;
+
   /* Can only handle 2, 3, or 4 insns at present, though could be easily
      extended if required.  */
   gcc_assert (nops >= 2 && nops <= 4);
@@ -7874,7 +9064,7 @@

      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
      for counts of 3 or 4 regs.  */
-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
+  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
     {
       rtx seq;

@@ -7937,7 +9127,7 @@

   /* See arm_gen_load_multiple for discussion of
      the pros/cons of ldm/stm usage for XScale.  */
-  if (arm_tune_xscale && count <= 2 && ! optimize_size)
+  if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
     {
       rtx seq;

@@ -9555,7 +10745,10 @@
       gcc_assert (GET_CODE (from) != BARRIER);

       /* Count the length of this insn.  */
-      count += get_attr_length (from);
+      if (LABEL_P (from) && (align_jumps > 0 || align_loops > 0))
+        count += MAX (align_jumps, align_loops);
+      else
+        count += get_attr_length (from);

       /* If there is a jump table, add its length.  */
       tmp = is_jump_table (from);
@@ -9867,6 +11060,8 @@
 	      insn = table;
 	    }
 	}
+      else if (LABEL_P (insn) && (align_jumps > 0 || align_loops > 0))
+	address += MAX (align_jumps, align_loops);
     }

   fix = minipool_fix_head;
@@ -10072,6 +11267,21 @@
 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
 {
   int i;
+  int offset;
+
+  if (low_irq_latency)
+    {
+      /* Output a sequence of FLDD instructions.  */
+      offset = 0;
+      for (i = reg; i < reg + count; ++i, offset += 8)
+	{
+	  fputc ('\t', stream);
+	  asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset);
+	}
+      asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8);
+      return;
+    }
+

   /* Workaround ARM10 VFPr1 bug.  */
   if (count == 2 && !arm_arch6)
@@ -10142,6 +11352,56 @@
   rtx tmp, reg;
   int i;

+  if (low_irq_latency)
+    {
+      int saved_size;
+      rtx sp_insn;
+
+      if (!count)
+	return 0;
+
+      saved_size = count * GET_MODE_SIZE (DFmode);
+
+      /* Since fstd does not have postdecrement addressing mode,
+	 we first decrement stack pointer and then use base+offset
+	 stores for VFP registers. The ARM EABI unwind information
+	 can't easily describe base+offset loads, so we attach
+	 a note for the effects of the whole block in the first insn,
+	 and  avoid marking the subsequent instructions
+	 with RTX_FRAME_RELATED_P.  */
+      sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (-saved_size));
+      sp_insn = emit_insn (sp_insn);
+      RTX_FRAME_RELATED_P (sp_insn) = 1;
+
+      dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+      XVECEXP (dwarf, 0, 0) =
+	gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx, -saved_size));
+
+      /* push double VFP registers to stack */
+      for (i = 0; i < count; ++i )
+	{
+	  rtx reg;
+	  rtx mem;
+	  rtx addr;
+	  rtx insn;
+	  reg = gen_rtx_REG (DFmode, base_reg + 2*i);
+	  addr = (i == 0) ? stack_pointer_rtx
+	    : gen_rtx_PLUS (SImode, stack_pointer_rtx,
+			    GEN_INT (i * GET_MODE_SIZE (DFmode)));
+	  mem = gen_frame_mem (DFmode, addr);
+	  insn = emit_move_insn (mem, reg);
+	  XVECEXP (dwarf, 0, i+1) =
+	    gen_rtx_SET (VOIDmode, mem, reg);
+	}
+
+      REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
+					       REG_NOTES (sp_insn));
+
+      return saved_size;
+    }
+
   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
      register pairs are stored by a store multiple insn.  We avoid this
      by pushing an extra pair.  */
@@ -10758,7 +12018,7 @@
 }

 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
-   handles MEMs accepted by neon_vector_mem_operand with CORE=true.  */
+   handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */

 const char *
 output_move_quad (rtx *operands)
@@ -10954,6 +12214,12 @@
       ops[1] = reg;
       break;

+    case PRE_DEC:
+      templ = "v%smdb%%?\t%%0!, %%h1";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
     case POST_MODIFY:
       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
       gcc_unreachable ();
@@ -10968,7 +12234,7 @@
 	  {
 	    /* We're only using DImode here because it's a convenient size.  */
 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, SImode, 8 * i);
+	    ops[1] = adjust_address (mem, DImode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
@@ -11557,7 +12823,7 @@
 	      if (count > 0)
 		{
 		  /* Workaround ARM10 VFPr1 bug.  */
-		  if (count == 2 && !arm_arch6)
+		  if (count == 2 && !arm_arch6 && !low_irq_latency)
 		    count++;
 		  saved += count * 8;
 		}
@@ -11886,6 +13152,41 @@
   return_used_this_function = 0;
 }

+/* Generate to STREAM a code sequence that pops registers identified
+   in REGS_MASK from SP. SP is incremented as the result.
+*/
+static void
+print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe)
+{
+  int reg;
+
+  gcc_assert (! (regs_mask & (1 << SP_REGNUM)));
+
+  for (reg = 0; reg < PC_REGNUM; ++reg)
+    if (regs_mask & (1 << reg))
+      asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+		   reg, SP_REGNUM);
+
+  if (regs_mask & (1 << PC_REGNUM))
+    {
+      if (rfe)
+	/* When returning from exception, we need to
+	   copy SPSR to CPSR.  There are two ways to do
+	   that: the ldm instruction with "^" suffix,
+	   and movs instruction.  The latter would
+	   require that we load from stack to some
+	   scratch register, and then move to PC.
+	   Therefore, we'd need extra instruction and
+	   have to make sure we actually have a spare
+	   register.  Using ldm with a single register
+	   is simler.  */
+	asm_fprintf (stream, "\tldm\tsp!, {pc}^\n");
+      else
+	asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+		     PC_REGNUM, SP_REGNUM);
+    }
+}
+
 const char *
 arm_output_epilogue (rtx sibling)
 {
@@ -11946,7 +13247,7 @@
       /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
       int vfp_offset = offsets->frame;

-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -12169,7 +13470,7 @@
 			 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
 	}

-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -12253,22 +13554,19 @@
 	 to load use the LDR instruction - it is faster.  For Thumb-2
 	 always use pop and the assembler will pick the best instruction.*/
       if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
-	  && !IS_INTERRUPT(func_type))
+	  && !IS_INTERRUPT (func_type))
 	{
 	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
 	}
       else if (saved_regs_mask)
 	{
-	  if (saved_regs_mask & (1 << SP_REGNUM))
-	    /* Note - write back to the stack register is not enabled
-	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
-	       in the list of registers and if we add writeback the
-	       instruction becomes UNPREDICTABLE.  */
-	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
-	  else if (TARGET_ARM)
-	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
+	  gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM)));
+	  if (TARGET_ARM)
+	    if (low_irq_latency)
+	      print_pop_reg_by_ldr (f, saved_regs_mask, rfe);
+	    else
+	      print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
+			       rfe);
 	  else
 	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
 	}
@@ -12389,6 +13687,32 @@

   gcc_assert (num_regs && num_regs <= 16);

+  if (low_irq_latency)
+    {
+      rtx insn = 0;
+
+      /* Emit a series of ldr instructions rather rather than a single ldm.  */
+      /* TODO: Use ldrd where possible.  */
+      gcc_assert (! (mask & (1 << SP_REGNUM)));
+
+      for (i = LAST_ARM_REGNUM; i >= 0; --i)
+        {
+          if (mask & (1 << i))
+
+            {
+              rtx reg, where, mem;
+
+	      reg = gen_rtx_REG (SImode, i);
+	      where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
+	      mem = gen_rtx_MEM (SImode, where);
+	      insn = emit_move_insn (mem, reg);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+            }
+        }
+
+      return insn;
+    }
+
   /* We don't record the PC in the dwarf frame information.  */
   num_dwarf_regs = num_regs;
   if (mask & (1 << PC_REGNUM))
@@ -12737,22 +14061,23 @@
 	{
 	  int reg = -1;

-	  for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
-	    {
-	      if ((offsets->saved_regs_mask & (1 << i)) == 0)
-		{
-		  reg = i;
-		  break;
-		}
-	    }
-
-	  if (reg == -1 && arm_size_return_regs () <= 12
-	      && !crtl->tail_call_emit)
+	  /* If it is safe to use r3, then do so.  This sometimes
+	     generates better code on Thumb-2 by avoiding the need to
+	     use 32-bit push/pop instructions.  */
+	  if (!crtl->tail_call_emit
+	      && arm_size_return_regs () <= 12)
 	    {
-	      /* Push/pop an argument register (r3) if all callee saved
-	         registers are already being pushed.  */
 	      reg = 3;
 	    }
+	  else
+	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+	      {
+		if ((offsets->saved_regs_mask & (1 << i)) == 0)
+		  {
+		    reg = i;
+		    break;
+		  }
+	      }

 	  if (reg != -1)
 	    {
@@ -12876,7 +14201,7 @@

   /* Save any floating point call-saved registers used by this
      function.  */
-  if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+  if (TARGET_FPA_EMU2)
     {
       for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -13483,7 +14808,11 @@
 	  {
 	    fprintf (stream, ", %s ", shift);
 	    if (val == -1)
-	      arm_print_operand (stream, XEXP (x, 1), 0);
+              {
+	        arm_print_operand (stream, XEXP (x, 1), 0);
+                if (janus2_code)
+                  fprintf(stream, "\n\tnop");
+              }
 	    else
 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
 	  }
@@ -13704,6 +15033,30 @@
 	}
       return;

+    /* Print the high single-precision register of a VFP double-precision
+       register.  */
+    case 'p':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+      }
+      return;
+
     /* Print a VFP/Neon double precision or quad precision register name.  */
     case 'P':
     case 'q':
@@ -13821,6 +15174,57 @@
       }
       return;

+    /* Memory operand for vld1/vst1 instruction.  */
+    case 'A':
+      {
+	rtx addr;
+	bool postinc = FALSE;
+	unsigned align;
+
+	gcc_assert (GET_CODE (x) == MEM);
+	addr = XEXP (x, 0);
+	if (GET_CODE (addr) == POST_INC)
+	  {
+	    postinc = 1;
+	    addr = XEXP (addr, 0);
+	  }
+	align = MEM_ALIGN (x) >> 3;
+	asm_fprintf (stream, "[%r", REGNO (addr));
+	if (align > GET_MODE_SIZE (GET_MODE (x)))
+	  align = GET_MODE_SIZE (GET_MODE (x));
+	if (align >= 8)
+	  asm_fprintf (stream, ", :%d", align << 3);
+	asm_fprintf (stream, "]");
+	if (postinc)
+	  fputs("!", stream);
+      }
+      return;
+
+    /* Register specifier for vld1.16/vst1.16.  Translate the S register
+       number into a D register number and element index.  */
+    case 'z':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	regno = regno - FIRST_VFP_REGNUM;
+	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
+      }
+      return;
+
     default:
       if (x == 0)
 	{
@@ -13854,6 +15258,12 @@
 	default:
 	  gcc_assert (GET_CODE (x) != NEG);
 	  fputc ('#', stream);
+	  if (GET_CODE (x) == HIGH)
+	    {
+	      fputs (":lower16:", stream);
+	      x = XEXP (x, 0);
+	    }
+
 	  output_addr_const (stream, x);
 	  break;
 	}
@@ -14245,6 +15655,10 @@
      first insn after the following code_label if REVERSE is true.  */
   rtx start_insn = insn;

+  /* Don't do this if we're not considering conditional execution.  */
+  if (TARGET_NO_SINGLE_COND_EXEC)
+    return;
+
   /* If in state 4, check if the target branch is reached, in order to
      change back to state 0.  */
   if (arm_ccfsm_state == 4)
@@ -14618,6 +16032,11 @@
       if (mode == DFmode)
 	return VFP_REGNO_OK_FOR_DOUBLE (regno);

+      /* VFP registers can hold HFmode values, but there is no point in
+	 putting them there unless we have hardware conversion insns. */
+      if (mode == HFmode)
+	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+
       if (TARGET_NEON)
         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
                || (VALID_NEON_QREG_MODE (mode)
@@ -14637,16 +16056,16 @@
 	return mode == SImode;

       if (IS_IWMMXT_REGNUM (regno))
-	return VALID_IWMMXT_REG_MODE (mode);
+	return VALID_IWMMXT_REG_MODE (mode) && mode != SImode;
     }

-  /* We allow any value to be stored in the general registers.
+  /* We allow almost any value to be stored in the general registers.
      Restrict doubleword quantities to even register pairs so that we can
-     use ldrd.  Do not allow Neon structure opaque modes in general registers;
-     they would use too many.  */
+     use ldrd.  Do not allow very large Neon structure opaque modes in
+     general registers; they would use too many.  */
   if (regno <= LAST_ARM_REGNUM)
     return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
-      && !VALID_NEON_STRUCT_MODE (mode);
+      && ARM_NUM_REGS (mode) <= 4;

   if (regno == FRAME_POINTER_REGNUM
       || regno == ARG_POINTER_REGNUM)
@@ -16103,6 +17522,15 @@
 }

 static void
+arm_init_fp16_builtins (void)
+{
+  tree fp16_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (fp16_type) = 16;
+  layout_type (fp16_type);
+  (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+}
+
+static void
 arm_init_builtins (void)
 {
   arm_init_tls_builtins ();
@@ -16112,6 +17540,71 @@

   if (TARGET_NEON)
     arm_init_neon_builtins ();
+
+  if (arm_fp16_format)
+    arm_init_fp16_builtins ();
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_parameter_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("function parameters cannot have __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_return_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("functions cannot return __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_PROMOTED_TYPE.  */
+
+static tree
+arm_promoted_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return float_type_node;
+  return NULL_TREE;
+}
+
+/* Implement TARGET_CONVERT_TO_TYPE.
+   Specifically, this hook implements the peculiarity of the ARM
+   half-precision floating-point C semantics that requires conversions between
+   __fp16 to or from double to do an intermediate conversion to float.  */
+
+static tree
+arm_convert_to_type (tree type, tree expr)
+{
+  tree fromtype = TREE_TYPE (expr);
+  if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
+    return NULL_TREE;
+  if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
+      || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
+    return convert (type, convert (float_type_node, expr));
+  return NULL_TREE;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
+   This simply adds HFmode as a supported mode; even though we don't
+   implement arithmetic on this type directly, it's supported by
+   optabs conversions, much the way the double-word arithmetic is
+   special-cased in the default hook.  */
+
+static bool
+arm_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (mode == HFmode)
+    return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
+  else
+    return default_scalar_mode_supported_p (mode);
 }

 /* Errors in the source file can cause expand_expr to return const0_rtx
@@ -17191,6 +18684,7 @@
   unsigned HOST_WIDE_INT mask = 0xff;
   int i;

+  val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
   if (val == 0) /* XXX */
     return 0;

@@ -18279,40 +19773,8 @@
       else
 	{
 	  int set_float_abi_attributes = 0;
-	  switch (arm_fpu_arch)
-	    {
-	    case FPUTYPE_FPA:
-	      fpu_name = "fpa";
-	      break;
-	    case FPUTYPE_FPA_EMU2:
-	      fpu_name = "fpe2";
-	      break;
-	    case FPUTYPE_FPA_EMU3:
-	      fpu_name = "fpe3";
-	      break;
-	    case FPUTYPE_MAVERICK:
-	      fpu_name = "maverick";
-	      break;
-	    case FPUTYPE_VFP:
-	      fpu_name = "vfp";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3D16:
-	      fpu_name = "vfpv3-d16";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3:
-	      fpu_name = "vfpv3";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_NEON:
-	      fpu_name = "neon";
-	      set_float_abi_attributes = 1;
-	      break;
-	    default:
-	      abort();
-	    }
-	  if (set_float_abi_attributes)
+	  fpu_name = arm_fpu_desc->name;
+	  if (arm_fp_model == ARM_FP_MODEL_VFP)
 	    {
 	      if (TARGET_HARD_FLOAT)
 		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
@@ -18362,6 +19824,11 @@
 	val = 6;
       asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);

+      /* Tag_ABI_FP_16bit_format.  */
+      if (arm_fp16_format)
+	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
+		     (int)arm_fp16_format);
+
       if (arm_lang_output_object_attributes_hook)
 	arm_lang_output_object_attributes_hook();
     }
@@ -18591,6 +20058,23 @@
   return 1;
 }

+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
+   HFmode constant pool entries are actually loaded with ldr.  */
+void
+arm_emit_fp16_const (rtx c)
+{
+  REAL_VALUE_TYPE r;
+  long bits;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+  bits = real_to_target (NULL, &r, HFmode);
+  if (WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+  assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
+  if (!WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+}
+
 const char *
 arm_output_load_gr (rtx *operands)
 {
@@ -18628,19 +20112,24 @@
    that way.  */

 static void
-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
 			    enum machine_mode mode,
 			    tree type,
 			    int *pretend_size,
 			    int second_time ATTRIBUTE_UNUSED)
 {
-  int nregs = cum->nregs;
-  if (nregs & 1
-      && ARM_DOUBLEWORD_ALIGN
-      && arm_needs_doubleword_align (mode, type))
-    nregs++;
-
+  int nregs;
+
   cfun->machine->uses_anonymous_args = 1;
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = pcum->aapcs_ncrn;
+      if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+	nregs++;
+    }
+  else
+    nregs = pcum->nregs;
+
   if (nregs < NUM_ARG_REGS)
     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
 }
@@ -19024,9 +20513,10 @@
       || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
     return true;

-  if ((mode == V2SImode)
-      || (mode == V4HImode)
-      || (mode == V8QImode))
+  if ((TARGET_NEON || TARGET_IWMMXT)
+      && ((mode == V2SImode)
+	  || (mode == V4HImode)
+	  || (mode == V8QImode)))
     return true;

   return false;
@@ -19057,9 +20547,14 @@
   if (IS_FPA_REGNUM (regno))
     return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;

-  /* FIXME: VFPv3 register numbering.  */
   if (IS_VFP_REGNUM (regno))
-    return 64 + regno - FIRST_VFP_REGNUM;
+    {
+      /* See comment in arm_dwarf_register_span.  */
+      if (VFP_REGNO_OK_FOR_SINGLE (regno))
+	  return 64 + regno - FIRST_VFP_REGNUM;
+      else
+	  return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+    }

   if (IS_IWMMXT_GR_REGNUM (regno))
     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
@@ -19070,6 +20565,39 @@
   gcc_unreachable ();
 }

+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+   GCC models tham as 64 32-bit registers, so we need to describe this to
+   the DWARF generation code.  Other registers can use the default.  */
+static rtx
+arm_dwarf_register_span(rtx rtl)
+{
+    unsigned regno;
+    int nregs;
+    int i;
+    rtx p;
+
+    regno = REGNO (rtl);
+    if (!IS_VFP_REGNUM (regno))
+	return NULL_RTX;
+
+    /* The EABI defines two VFP register ranges:
+	  64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+	  256-287: D0-D31
+       The recommended encodings for s0-s31 is a DW_OP_bit_piece of the
+       corresponding D register.  However gdb6.6 does not support this, so
+       we use the legacy encodings.  We also use these encodings for D0-D15
+       for compatibility with older debuggers.  */
+    if (VFP_REGNO_OK_FOR_SINGLE (regno))
+	return NULL_RTX;
+
+    nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
+    p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
+    regno = (regno - FIRST_VFP_REGNUM) / 2;
+    for (i = 0; i < nregs; i++)
+      XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+
+    return p;
+}

 #ifdef TARGET_UNWIND_INFO
 /* Emit unwind directives for a store-multiple instruction or stack pointer
@@ -19556,6 +21084,7 @@
     case cortexr4f:
     case cortexa8:
     case cortexa9:
+    case marvell_f:
       return 2;

     default:
@@ -19620,6 +21149,10 @@
       return "St9__va_list";
     }

+  /* Half-precision float.  */
+  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+    return "Dh";
+
   if (TREE_CODE (type) != VECTOR_TYPE)
     return NULL;

@@ -19676,6 +21209,87 @@
      given on the command line.  */
   if (level > 0)
     flag_section_anchors = 2;
+
+  if (size)
+    {
+      /* Select optimizations that are a win for code size.
+
+	 The inlining options set below have two important
+	 consequences for functions not explicitly marked
+	 inline:
+	 - Static functions used once are inlined if
+	 sufficiently small.  Static functions used twice
+	 are not inlined.
+	 - Non-static functions are never inlined.
+	 So in effect, inlining will never cause two copies
+	 of function bodies to be created.  */
+      /* Empirical results show that these options benefit code
+	 size on arm.  */
+      /* FIXME: -fsee seems to be broken for Thumb-2.  */
+      /* flag_see = 1; */
+      flag_move_loop_invariants = 0;
+      /* In Thumb mode the function call code size overhead is typically very
+	 small, and narrow branch instructions have very limited range.
+	 Inlining even medium sized functions tends to bloat the caller and
+	 require the use of long branch instructions. On average the long
+	 branches cost more than eliminating the function call overhead saves,
+	 so we use extremely restrictive automatic inlining heuristics.  In ARM
+	 mode the results are fairly neutral, probably due to better constant
+	 pool placement. */
+      set_param_value ("max-inline-insns-single", 1);
+      set_param_value ("max-inline-insns-auto", 1);
+    }
+  else
+    {
+      /* CSL LOCAL */
+      /* Set flag_unroll_loops to a default value, so that we can tell
+	 if it was specified on the command line; see
+	 arm_override_options.  */
+      flag_unroll_loops = 2;
+      /* Promote loop indices to int where possible.  Consider moving this
+	 to -Os, also.  */
+      flag_promote_loop_indices = 1;
+    }
+}
+
+/* Return how many instructions to look ahead for better insn
+   scheduling.  */
+static int
+arm_multipass_dfa_lookahead (void)
+{
+  return (arm_tune == marvell_f) ? 4 : 0;
+}
+
+/* Return the minimum alignment required to load or store a
+   vector of the given type, which may be less than the
+   natural alignment of the type.  */
+
+static int
+arm_vector_min_alignment (const_tree type)
+{
+  if (TARGET_NEON)
+    {
+      /* The NEON element load and store instructions only require the
+	 alignment of the element type.  They can benefit from higher
+	 statically reported alignment, but we do not take advantage
+	 of that yet.  */
+      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
+      return TYPE_ALIGN_UNIT (TREE_TYPE (type));
+    }
+
+  return default_vector_min_alignment (type);
+}
+
+static bool
+arm_vector_always_misalign(const_tree type ATTRIBUTE_UNUSED)
+{
+  /* On big-endian targets array loads (vld1) and vector loads (vldm)
+     use a different format.  Always use the "misaligned" array variant.
+     FIXME: this still doesn't work for big-endian because of constant
+     loads and other operations using vldm ordering.  See
+     issue 6722.  */
+  return TARGET_NEON && !BYTES_BIG_ENDIAN;
 }

 #include "gt-arm.h"
+
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -104,6 +104,7 @@
 ARM_CORE("xscale",        xscale,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
 ARM_CORE("iwmmxt",        iwmmxt,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
 ARM_CORE("iwmmxt2",       iwmmxt2,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
+ARM_CORE("marvell-f",	  marvell_f,	5TE,				 FL_LDSCHED | FL_VFPV2 | FL_MARVELL_F, 9e)

 /* V5TEJ Architecture Processors */
 ARM_CORE("arm926ej-s",    arm926ejs,	5TEJ,	                         FL_LDSCHED, 9e)
@@ -117,9 +118,13 @@
 ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
 ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
 ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
+
+/* V7 Architecture Processors */
+ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, 9e)
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -85,6 +85,10 @@
 	  builtin_define ("__IWMMXT__");		\
 	if (TARGET_AAPCS_BASED)				\
 	  builtin_define ("__ARM_EABI__");		\
+	if (arm_tune_marvell_f)				\
+	  builtin_define ("__ARM_TUNE_MARVELL_F__");	\
+	if (low_irq_latency)				\
+	  builtin_define ("__low_irq_latency__");	\
     } while (0)

 /* The various ARM cores.  */
@@ -199,6 +203,13 @@
 #define TARGET_AAPCS_BASED \
     (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS)

+/* True if we should avoid generating conditional execution instructions.  */
+#define TARGET_NO_COND_EXEC		(arm_tune_marvell_f && !optimize_size)
+/* Avoid most conditional instructions, but allow pairs with opposite
+   conditions and the same destination.  */
+#define TARGET_NO_SINGLE_COND_EXEC \
+  ((arm_tune_cortex_a9 || arm_tune_marvell_f) && !optimize_size)
+
 #define TARGET_HARD_TP			(target_thread_pointer == TP_CP15)
 #define TARGET_SOFT_TP			(target_thread_pointer == TP_SOFT)

@@ -211,35 +222,43 @@
 /* Thumb-1 only.  */
 #define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)

+#define TARGET_FPA_EMU2			(TARGET_FPA && arm_fpu_desc->rev == 2)
 /* The following two macros concern the ability to execute coprocessor
    instructions for VFPv3 or NEON.  TARGET_VFP3/TARGET_VFPD32 are currently
    only ever tested when we know we are generating for VFP hardware; we need
    to be more careful with TARGET_NEON as noted below.  */

 /* FPU is has the full VFPv3/NEON register file of 32 D registers.  */
-#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \
-		       && (arm_fpu_arch == FPUTYPE_VFP3 \
-			   || arm_fpu_arch == FPUTYPE_NEON))
+#define TARGET_VFPD32 (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_D32)

 /* FPU supports VFPv3 instructions.  */
-#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
-		     && (arm_fpu_arch == FPUTYPE_VFP3D16 \
-			 || TARGET_VFPD32))
+#define TARGET_VFP3 (TARGET_VFP && arm_arch_vfp_rev >= 3)
+
+/* FPU only supports VFP single-precision instructions.  */
+#define TARGET_VFP_SINGLE (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_SINGLE)
+
+/* FPU supports VFP double-precision instructions.  */
+#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_arch_vfp_regs != VFP_REG_SINGLE)
+
+/* FPU supports half-precision floating-point with NEON element load/store.  */
+#define TARGET_NEON_FP16 (TARGET_VFP && arm_arch_vfp_neon && arm_arch_vfp_fp16)
+
+/* FPU supports VFP half-precision floating-point.  */
+#define TARGET_FP16 (TARGET_VFP && arm_arch_vfp_fp16)

 /* FPU supports Neon instructions.  The setting of this macro gets
    revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
    and TARGET_HARD_FLOAT to ensure that NEON instructions are
    available.  */
 #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
-		     && arm_fp_model == ARM_FP_MODEL_VFP \
-		     && arm_fpu_arch == FPUTYPE_NEON)
+		     && TARGET_VFP && arm_arch_vfp_neon)

 /* "DSP" multiply instructions, eg. SMULxy.  */
 #define TARGET_DSP_MULTIPLY \
-  (TARGET_32BIT && arm_arch5e && arm_arch_notm)
+  (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em))
 /* Integer SIMD instructions, and extend-accumulate instructions.  */
 #define TARGET_INT_SIMD \
-  (TARGET_32BIT && arm_arch6 && arm_arch_notm)
+  (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))

 /* Should MOVW/MOVT be used in preference to a constant pool.  */
 #define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
@@ -289,40 +308,30 @@
   ARM_FP_MODEL_VFP
 };

-extern enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  Also update
-   fp_model_for_fpu in arm.c when adding entries to this list.  */
-enum fputype
-{
-  /* No FP hardware.  */
-  FPUTYPE_NONE,
-  /* Full FPA support.  */
-  FPUTYPE_FPA,
-  /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM).  */
-  FPUTYPE_FPA_EMU2,
-  /* Emulated FPA hardware, Issue 3 emulator.  */
-  FPUTYPE_FPA_EMU3,
-  /* Cirrus Maverick floating point co-processor.  */
-  FPUTYPE_MAVERICK,
-  /* VFP.  */
-  FPUTYPE_VFP,
-  /* VFPv3-D16.  */
-  FPUTYPE_VFP3D16,
-  /* VFPv3.  */
-  FPUTYPE_VFP3,
-  /* Neon.  */
-  FPUTYPE_NEON
+enum vfp_reg_type {
+    VFP_REG_D16,
+    VFP_REG_D32,
+    VFP_REG_SINGLE
 };

-/* Recast the floating point class to be the floating point attribute.  */
-#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune)
-
-/* What type of floating point to tune for */
-extern enum fputype arm_fpu_tune;
+extern const struct arm_fpu_desc
+{
+  const char *name;
+  enum arm_fp_model model;
+  int rev;
+  enum vfp_reg_type myregs;
+  int neon;
+  int fp16;
+} *arm_fpu_desc;
+
+#define arm_fp_model arm_fpu_desc->model
+#define arm_arch_vfp_rev arm_fpu_desc->rev
+#define arm_arch_vfp_regs arm_fpu_desc->myregs
+#define arm_arch_vfp_neon arm_fpu_desc->neon
+#define arm_arch_vfp_fp16 arm_fpu_desc->fp16

-/* What type of floating point instructions are available */
-extern enum fputype arm_fpu_arch;
+/* Which floating point hardware to schedule for.  */
+extern int arm_fpu_attr;

 enum float_abi_type
 {
@@ -337,6 +346,21 @@
 #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
 #endif

+/* Which __fp16 format to use.
+   The enumeration values correspond to the numbering for the
+   Tag_ABI_FP_16bit_format attribute.
+ */
+enum arm_fp16_format_type
+{
+  ARM_FP16_FORMAT_NONE = 0,
+  ARM_FP16_FORMAT_IEEE = 1,
+  ARM_FP16_FORMAT_ALTERNATIVE = 2
+};
+
+extern enum arm_fp16_format_type arm_fp16_format;
+#define LARGEST_EXPONENT_IS_NORMAL(bits) \
+    ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+
 /* Which ABI to use.  */
 enum arm_abi_type
 {
@@ -383,12 +407,18 @@
 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 extern int arm_arch_notm;

+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+extern int arm_arch7em;
+
 /* Nonzero if this chip can benefit from load scheduling.  */
 extern int arm_ld_sched;

 /* Nonzero if generating thumb code.  */
 extern int thumb_code;

+/* Nonzero if generating Janus2 code. */
+extern int janus2_code;
+
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_tune_strongarm;

@@ -404,6 +434,9 @@
 /* Nonzero if tuning for XScale.  */
 extern int arm_tune_xscale;

+/* Nonzero if tuning for Marvell Feroceon.  */
+extern int arm_tune_marvell_f;
+
 /* Nonzero if tuning for stores via the write buffer.  */
 extern int arm_tune_wbuf;

@@ -423,6 +456,10 @@
 /* Nonzero if chip supports integer division instruction.  */
 extern int arm_arch_hwdiv;

+/* Nonzero if we should minimize interrupt latency of the
+   generated code.  */
+extern int low_irq_latency;
+
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
 #endif
@@ -757,12 +794,11 @@
 	fixed_regs[regno] = call_used_regs[regno] = 1;		\
     }								\
 								\
-  if (TARGET_THUMB && optimize_size)				\
-    {								\
-      /* When optimizing for size, it's better not to use	\
-	 the HI regs, because of the overhead of stacking 	\
-	 them.  */						\
-      /* ??? Is this still true for thumb2?  */			\
+  if (TARGET_THUMB1 && optimize_size)				\
+    {                                                           \
+      /* When optimizing for size on Thumb-1, it's better not	\
+        to use the HI regs, because of the overhead of		\
+        stacking them.  */                                      \
       for (regno = FIRST_HI_REGNUM;				\
 	   regno <= LAST_HI_REGNUM; ++regno)			\
 	fixed_regs[regno] = call_used_regs[regno] = 1;		\
@@ -881,6 +917,9 @@
 /* The number of (integer) argument register available.  */
 #define NUM_ARG_REGS		4

+/* And similarly for the VFP.  */
+#define NUM_VFP_ARG_REGS	16
+
 /* Return the register number of the N'th (integer) argument.  */
 #define ARG_REGISTER(N) 	(N - 1)

@@ -1059,7 +1098,7 @@
   (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))

 #define VALID_IWMMXT_REG_MODE(MODE) \
- (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
+ (arm_vector_mode_supported_p (MODE) || (MODE) == DImode || (MODE) == SImode)

 /* Modes valid for Neon D registers.  */
 #define VALID_NEON_DREG_MODE(MODE) \
@@ -1230,11 +1269,14 @@
      || reg_classes_intersect_p (VFP_REGS, (CLASS))	\
    : 0)

-/* We need to define this for LO_REGS on thumb.  Otherwise we can end up
-   using r0-r4 for function arguments, r7 for the stack frame and don't
-   have enough left over to do doubleword arithmetic.  */
+/* We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
+   using r0-r4 for function arguments, r7 for the stack frame and don't have
+   enough left over to do doubleword arithmetic.  For Thumb-2 all the
+   potentially problematic instructions accept high registers so this is not
+   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
+   that require many low registers.  */
 #define CLASS_LIKELY_SPILLED_P(CLASS)	\
-    ((TARGET_THUMB && (CLASS) == LO_REGS)	\
+    ((TARGET_THUMB1 && (CLASS) == LO_REGS)	\
      || (CLASS) == CC_REG)

 /* The class value for index registers, and the one for base regs.  */
@@ -1245,7 +1287,7 @@
    when addressing quantities in QI or HI mode; if we don't know the
    mode, then we must be conservative.  */
 #define MODE_BASE_REG_CLASS(MODE)					\
-    (TARGET_32BIT ? CORE_REGS :					\
+    (TARGET_32BIT ? (TARGET_THUMB2 ? LO_REGS : CORE_REGS) :					\
      (((MODE) == SImode) ? BASE_REGS : LO_REGS))

 /* For Thumb we can not support SP+reg addressing, so we return LO_REGS
@@ -1346,6 +1388,9 @@
 	  else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)		   \
 	    /* Need to be careful, -256 is not a valid offset.  */	   \
 	    low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);		   \
+	  else if (TARGET_REALLY_IWMMXT && MODE == SImode)		   \
+	    /* Need to be careful, -1024 is not a valid offset.  */	   \
+	    low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);		   \
 	  else if (MODE == SImode					   \
 		   || (MODE == SFmode && TARGET_SOFT_FLOAT)		   \
 		   || ((MODE == HImode || MODE == QImode) && ! arm_arch4)) \
@@ -1416,13 +1461,17 @@
 /* If defined, gives a class of registers that cannot be used as the
    operand of a SUBREG that changes the mode of the object illegally.  */

-/* Moves between FPA_REGS and GENERAL_REGS are two memory insns.  */
+/* Moves between FPA_REGS and GENERAL_REGS are two memory insns.
+   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
+   it is typically more expensive than a single memory access.  We set
+   the cost to less than two memory accesses so that floating
+   point to integer conversion does not go through memory.  */
 #define REGISTER_MOVE_COST(MODE, FROM, TO)		\
   (TARGET_32BIT ?						\
    ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 :	\
     (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 :	\
-    IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 10 :	\
-    !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 10 :	\
+    IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 15 :	\
+    !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 15 :	\
     (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 :  \
     (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 :  \
     (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 :  \
@@ -1491,9 +1540,10 @@

 /* Define how to find the value returned by a library function
    assuming the value has mode MODE.  */
-#define LIBCALL_VALUE(MODE)  \
-  (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA			\
-   && GET_MODE_CLASS (MODE) == MODE_FLOAT				\
+#define LIBCALL_VALUE(MODE)  						\
+  (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE)			\
+   : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA		\
+      && GET_MODE_CLASS (MODE) == MODE_FLOAT)				\
    ? gen_rtx_REG (MODE, FIRST_FPA_REGNUM)				\
    : TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK		\
      && GET_MODE_CLASS (MODE) == MODE_FLOAT				\
@@ -1502,22 +1552,16 @@
    ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) 				\
    : gen_rtx_REG (MODE, ARG_REGISTER (1)))

-/* Define how to find the value returned by a function.
-   VALTYPE is the data type of the value (as a tree).
-   If the precise function being called is known, FUNC is its FUNCTION_DECL;
-   otherwise, FUNC is 0.  */
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  arm_function_value (VALTYPE, FUNC);
-
-/* 1 if N is a possible register number for a function value.
-   On the ARM, only r0 and f0 can return results.  */
-/* On a Cirrus chip, mvf0 can return results.  */
-#define FUNCTION_VALUE_REGNO_P(REGNO)  \
-  ((REGNO) == ARG_REGISTER (1) \
-   || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM)		\
-       && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK)			\
-   || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \
-   || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM)			\
+/* 1 if REGNO is a possible register number for a function value.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)				\
+  ((REGNO) == ARG_REGISTER (1)					\
+   || (TARGET_AAPCS_BASED && TARGET_32BIT 			\
+       && TARGET_VFP && TARGET_HARD_FLOAT			\
+       && (REGNO) == FIRST_VFP_REGNUM)				\
+   || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM)	\
+       && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK)		\
+   || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI)	\
+   || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM)		\
        && TARGET_HARD_FLOAT_ABI && TARGET_FPA))

 /* Amount of memory needed for an untyped call to save all possible return
@@ -1617,9 +1661,27 @@
    that is in text_section.  */
 extern GTY(()) rtx thumb_call_via_label[14];

+/* The number of potential ways of assigning to a co-processor.  */
+#define ARM_NUM_COPROC_SLOTS 1
+
+/* Enumeration of procedure calling standard variants.  We don't really
+   support all of these yet.  */
+enum arm_pcs
+{
+  ARM_PCS_AAPCS,	/* Base standard AAPCS.  */
+  ARM_PCS_AAPCS_VFP,	/* Use VFP registers for floating point values.  */
+  ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors.  */
+  /* This must be the last AAPCS variant.  */
+  ARM_PCS_AAPCS_LOCAL,	/* Private call within this compilation unit.  */
+  ARM_PCS_ATPCS,	/* ATPCS.  */
+  ARM_PCS_APCS,		/* APCS (legacy Linux etc).  */
+  ARM_PCS_UNKNOWN
+};
+
+/* We can't define this inside a generator file because it needs enum
+   machine_mode.  */
 /* A C type for declaring a variable that is used as the first argument of
-   `FUNCTION_ARG' and other related values.  For some target machines, the
-   type `int' suffices and can hold the number of bytes of argument so far.  */
+   `FUNCTION_ARG' and other related values.  */
 typedef struct
 {
   /* This is the number of registers of arguments scanned so far.  */
@@ -1628,9 +1690,33 @@
   int iwmmxt_nregs;
   int named_count;
   int nargs;
-  int can_split;
+  /* Which procedure call variant to use for this call.  */
+  enum arm_pcs pcs_variant;
+
+  /* AAPCS related state tracking.  */
+  int aapcs_arg_processed;  /* No need to lay out this argument again.  */
+  int aapcs_cprc_slot;      /* Index of co-processor rules to handle
+			       this argument, or -1 if using core
+			       registers.  */
+  int aapcs_ncrn;
+  int aapcs_next_ncrn;
+  rtx aapcs_reg;	    /* Register assigned to this argument.  */
+  int aapcs_partial;	    /* How many bytes are passed in regs (if
+			       split between core regs and stack.
+			       Zero otherwise.  */
+  int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS];
+  int can_split;	    /* Argument can be split between core regs
+			       and the stack.  */
+  /* Private data for tracking VFP register allocation */
+  unsigned aapcs_vfp_regs_free;
+  unsigned aapcs_vfp_reg_alloc;
+  int aapcs_vfp_rcount;
+  /* Can't include insn-modes.h because this header is needed before we
+     generate it.  */
+  int /* enum machine_mode */ aapcs_vfp_rmode;
 } CUMULATIVE_ARGS;

+
 /* Define where to put the arguments to a function.
    Value is zero to push the argument on the stack,
    or a hard register in which to store the argument.
@@ -1674,13 +1760,7 @@
    of mode MODE and data type TYPE.
    (TYPE is null for libcalls where that information may not be available.)  */
 #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)	\
-  (CUM).nargs += 1;					\
-  if (arm_vector_mode_supported_p (MODE)		\
-      && (CUM).named_count > (CUM).nargs		\
-      && TARGET_IWMMXT_ABI)				\
-    (CUM).iwmmxt_nregs += 1;				\
-  else							\
-    (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE)
+  arm_function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED))

 /* If defined, a C expression that gives the alignment boundary, in bits, of an
    argument with the specified mode and type.  If it is not defined,
@@ -1692,9 +1772,11 @@

 /* 1 if N is a possible register number for function argument passing.
    On the ARM, r0-r3 are used to pass args.  */
-#define FUNCTION_ARG_REGNO_P(REGNO)	\
-   (IN_RANGE ((REGNO), 0, 3)		\
-    || (TARGET_IWMMXT_ABI		\
+#define FUNCTION_ARG_REGNO_P(REGNO)					\
+   (IN_RANGE ((REGNO), 0, 3)						\
+    || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT		\
+	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15))	\
+    || (TARGET_IWMMXT_ABI						\
 	&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))


@@ -2324,7 +2406,8 @@
 /* Try to generate sequences that don't involve branches, we can then use
    conditional instructions */
 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
+		: (optimize > 0 ? 2 : 0))

 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and
@@ -2392,6 +2475,7 @@

 /* The arm5 clz instruction returns 32.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)

 #undef  ASM_APP_OFF
 #define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \
@@ -2404,6 +2488,19 @@
       if (TARGET_ARM)					\
 	asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n",	\
 		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  /* We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1.
+	     We know that ASM_OUTPUT_REG_PUSH will be matched with
+	     ASM_OUTPUT_REG_POP, and that r7 isn't used by the function
+	     profiler, so we can use it as a scratch reg.  WARNING: This isn't
+	     safe in the general case!  It may be sensitive to future changes
+	     in final.c:profile_function.  */		\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	}						\
       else						\
 	asm_fprintf (STREAM, "\tpush {%r}\n", REGNO);	\
     } while (0)
@@ -2415,6 +2512,14 @@
       if (TARGET_ARM)					\
 	asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n",	\
 		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  /* See comment in ASM_OUTPUT_REG_PUSH.  */	\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	}						\
       else						\
 	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
     } while (0)
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -99,6 +99,7 @@
 			  ; correctly for PIC usage.
    (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a
 			  ; a given symbolic address.
+   (UNSPEC_RBIT 25)	  ; rbit operation.
   ]
 )

@@ -131,6 +132,8 @@
    (VUNSPEC_WCMP_EQ  12) ; Used by the iWMMXt WCMPEQ instructions
    (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions
    (VUNSPEC_WCMP_GT  14) ; Used by the iwMMXT WCMPGT instructions
+   (VUNSPEC_ALIGN16  15) ; Used to force 16-byte alignment.
+   (VUNSPEC_ALIGN32  16) ; Used to force 32-byte alignment.
    (VUNSPEC_EH_RETURN 20); Use to override the return address for exception
 			 ; handling.
   ]
@@ -144,6 +147,10 @@
 ; patterns that share the same RTL in both ARM and Thumb code.
 (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))

+; FIX_JANUS is set to 'yes' when compiling for Janus2, it causes to
+; add a nop after shifts, in order to work around a Janus2 bug
+(define_attr "fix_janus" "no,yes" (const (symbol_ref "janus2_code")))
+
 ; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects
 ; scheduling decisions for the load unit and the multiplier.
 (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm")))
@@ -158,7 +165,7 @@
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
   (const (symbol_ref "arm_fpu_attr")))

 ; LENGTH of an instruction (in bytes)
@@ -185,7 +192,7 @@
 ;; scheduling information.

 (define_attr "insn"
-        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
+        "mov,mvn,and,orr,eor,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
         (const_string "other"))

 ; TYPE attribute is used to detect floating point instructions which, if
@@ -251,8 +258,6 @@
 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))

 ;; Classification of NEON instructions for scheduling purposes.
-;; Do not set this attribute and the "type" attribute together in
-;; any one instruction pattern.
 (define_attr "neon_type"
    "neon_int_1,\
    neon_int_2,\
@@ -415,7 +420,7 @@

 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9")
+          (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9,marvell_f")
 	      (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
@@ -423,7 +428,7 @@
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
 	  (and (eq_attr "fpu" "vfp")
-	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9")
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9,marvell_f")
 	       (eq_attr "tune_cortexr4" "no"))
 	  (const_string "yes")
 	  (const_string "no"))))
@@ -437,6 +442,8 @@
 (include "cortex-a9.md")
 (include "cortex-r4.md")
 (include "cortex-r4f.md")
+(include "marvell-f.md")
+(include "marvell-f-vfp.md")
 (include "vfp11.md")


@@ -472,9 +479,9 @@
   if (TARGET_THUMB1)
     {
       if (GET_CODE (operands[1]) != REG)
-        operands[1] = force_reg (SImode, operands[1]);
+        operands[1] = force_reg (DImode, operands[1]);
       if (GET_CODE (operands[2]) != REG)
-        operands[2] = force_reg (SImode, operands[2]);
+        operands[2] = force_reg (DImode, operands[2]);
      }
   "
 )
@@ -620,10 +627,11 @@
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
    #"
-  "TARGET_32BIT &&
-   GET_CODE (operands[2]) == CONST_INT
+  "TARGET_32BIT
+   && GET_CODE (operands[2]) == CONST_INT
    && !(const_ok_for_arm (INTVAL (operands[2]))
-        || const_ok_for_arm (-INTVAL (operands[2])))"
+        || const_ok_for_arm (-INTVAL (operands[2])))
+   && (reload_completed || !arm_eliminable_register (operands[1]))"
   [(clobber (const_int 0))]
   "
   arm_split_constant (PLUS, SImode, curr_insn,
@@ -639,10 +647,10 @@
 ;; register.  Trying to reload it will always fail catastrophically,
 ;; so never allow those alternatives to match if reloading is needed.

-(define_insn "*thumb1_addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=l,l,l,*rk,*hk,l,!k")
-	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k")
-		 (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O")))]
+(define_insn_and_split "*thumb1_addsi3"
+  [(set (match_operand:SI          0 "register_operand" "=l,l,l,*rk,*hk,l,!k,l,l")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k,0,l")
+		 (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O,Pa,Pb")))]
   "TARGET_THUMB1"
   "*
    static const char * const asms[] =
@@ -653,7 +661,9 @@
      \"add\\t%0, %0, %2\",
      \"add\\t%0, %0, %2\",
      \"add\\t%0, %1, %2\",
-     \"add\\t%0, %1, %2\"
+     \"add\\t%0, %1, %2\",
+     \"#\",
+     \"#\"
    };
    if ((which_alternative == 2 || which_alternative == 6)
        && GET_CODE (operands[2]) == CONST_INT
@@ -661,7 +671,22 @@
      return \"sub\\t%0, %1, #%n2\";
    return asms[which_alternative];
   "
-  [(set_attr "length" "2")]
+  "&& reload_completed && CONST_INT_P (operands[2])
+   && operands[1] != stack_pointer_rtx
+   && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255)"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
+  {
+    HOST_WIDE_INT offset = INTVAL (operands[2]);
+    if (offset > 255)
+      offset = 255;
+    else if (offset < -255)
+      offset = -255;
+
+    operands[3] = GEN_INT (offset);
+    operands[2] = GEN_INT (INTVAL (operands[2]) - offset);
+  }
+  [(set_attr "length" "2,2,2,2,2,2,2,4,4")]
 )

 ;; Reloading and elimination of the frame pointer can
@@ -854,7 +879,11 @@
   [(set_attr "conds" "use")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*addsi3_carryin_alt1"
@@ -938,7 +967,7 @@
   [(set (match_operand:DF          0 "s_register_operand" "")
 	(plus:DF (match_operand:DF 1 "s_register_operand" "")
 		 (match_operand:DF 2 "arm_float_add_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   if (TARGET_MAVERICK
       && !cirrus_fp_register (operands[2], DFmode))
@@ -1176,7 +1205,7 @@
   [(set (match_operand:DF           0 "s_register_operand" "")
 	(minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "")
 		  (match_operand:DF 2 "arm_float_rhs_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   if (TARGET_MAVERICK)
     {
@@ -1332,6 +1361,49 @@
    (set_attr "predicable" "yes")]
 )

+; The combiner cannot combine the first and last insns in the
+; following sequence because of the intervening insn, so help the
+; combiner with this splitter.  The combiner does attempt to split
+; this particular combination but does not know this exact split.
+; Note that the combiner puts the constant at the outermost operation
+; as a part of canonicalization.
+;
+; mul r3, r2, r1
+; <add/sub> r3, r3, <constant>
+; add r3, r3, r4
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "plusminus_operator"
+	 [(plus:SI (mult:SI (match_operand:SI 2 "s_register_operand" "")
+			    (match_operand:SI 3 "s_register_operand" ""))
+		   (match_operand:SI 4 "s_register_operand" ""))
+	  (match_operand:SI 5 "arm_immediate_operand" "")]))]
+  "TARGET_32BIT"
+  [(set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 2) (match_dup 3))
+		 (match_dup 4)))
+   (set (match_dup 0)
+	(match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))]
+  "")
+
+; Likewise for MLS.  MLS is available only on select architectures.
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "plusminus_operator"
+	 [(minus:SI (match_operand:SI 2 "s_register_operand" "")
+		    (mult:SI (match_operand:SI 3 "s_register_operand" "")
+			     (match_operand:SI 4 "s_register_operand" "")))
+	  (match_operand:SI 5 "arm_immediate_operand" "")]))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  [(set (match_dup 0)
+	(minus:SI (match_dup 2)
+		  (mult:SI (match_dup 3) (match_dup 4))))
+   (set (match_dup 0)
+	(match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))]
+  "")
+
 (define_insn "*mulsi3addsi_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
@@ -1713,7 +1785,7 @@
   [(set (match_operand:DF          0 "s_register_operand" "")
 	(mult:DF (match_operand:DF 1 "s_register_operand" "")
 		 (match_operand:DF 2 "arm_float_rhs_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   if (TARGET_MAVERICK
       && !cirrus_fp_register (operands[2], DFmode))
@@ -1733,7 +1805,7 @@
   [(set (match_operand:DF 0 "s_register_operand" "")
 	(div:DF (match_operand:DF 1 "arm_float_rhs_operand" "")
 		(match_operand:DF 2 "arm_float_rhs_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
   "")

 ;; Modulo insns
@@ -1960,6 +2032,7 @@
   DONE;
   "
   [(set_attr "length" "4,4,16")
+   (set_attr "insn" "and")
    (set_attr "predicable" "yes")]
 )

@@ -1969,7 +2042,8 @@
 		(match_operand:SI 2 "register_operand" "l")))]
   "TARGET_THUMB1"
   "and\\t%0, %0, %2"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "insn" "and")]
 )

 (define_insn "*andsi3_compare0"
@@ -1984,7 +2058,8 @@
   "@
    and%.\\t%0, %1, %2
    bic%.\\t%0, %1, #%B2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "and,*")]
 )

 (define_insn "*andsi3_compare0_scratch"
@@ -2280,7 +2355,7 @@
 	  }
       }

-    target = operands[0];
+    target = copy_rtx (operands[0]);
     /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
        subreg as the final target.  */
     if (GET_CODE (target) == SUBREG)
@@ -2528,7 +2603,11 @@
    (set_attr "shift" "2")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*andsi_notsi_si_compare0"
@@ -2576,6 +2655,7 @@
    orr%?\\t%Q0, %Q1, %2
    #"
   [(set_attr "length" "4,8")
+   (set_attr "insn" "orr")
    (set_attr "predicable" "yes")]
 )

@@ -2638,7 +2718,8 @@
 		(match_operand:SI 2 "register_operand" "l")))]
   "TARGET_THUMB1"
   "orr\\t%0, %0, %2"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "insn" "orr")]
 )

 (define_peephole2
@@ -2663,7 +2744,8 @@
 	(ior:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "orr%.\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "orr")]
 )

 (define_insn "*iorsi3_compare0_scratch"
@@ -2674,7 +2756,8 @@
    (clobber (match_scratch:SI 0 "=r"))]
   "TARGET_32BIT"
   "orr%.\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "orr")]
 )

 (define_insn "xordi3"
@@ -2697,7 +2780,8 @@
    eor%?\\t%Q0, %Q1, %2
    #"
   [(set_attr "length" "4,8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "insn" "eor")]
 )

 (define_insn "*xordi_sesidi_di"
@@ -2728,7 +2812,8 @@
 		(match_operand:SI 2 "arm_rhs_operand" "rI")))]
   "TARGET_32BIT"
   "eor%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "eor")]
 )

 (define_insn "*thumb1_xorsi3"
@@ -2737,7 +2822,8 @@
 		(match_operand:SI 2 "register_operand" "l")))]
   "TARGET_THUMB1"
   "eor\\t%0, %0, %2"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "insn" "eor")]
 )

 (define_insn "*xorsi3_compare0"
@@ -2749,7 +2835,8 @@
 	(xor:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "eor%.\\t%0, %1, %2"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "eor")]
 )

 (define_insn "*xorsi3_compare0_scratch"
@@ -2906,7 +2993,7 @@
 	 (smax:SI (match_operand:SI 1 "s_register_operand" "")
 		  (match_operand:SI 2 "arm_rhs_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "
   if (operands[2] == const0_rtx || operands[2] == constm1_rtx)
     {
@@ -2933,7 +3020,8 @@
 		 (const_int -1)))]
   "TARGET_32BIT"
   "orr%?\\t%0, %1, %1, asr #31"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "orr")]
 )

 (define_insn "*arm_smax_insn"
@@ -2941,7 +3029,7 @@
 	(smax:SI (match_operand:SI 1 "s_register_operand"  "%0,?r")
 		 (match_operand:SI 2 "arm_rhs_operand"    "rI,rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_COND_EXEC"
   "@
    cmp\\t%1, %2\;movlt\\t%0, %2
    cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
@@ -2955,7 +3043,7 @@
 	 (smin:SI (match_operand:SI 1 "s_register_operand" "")
 		  (match_operand:SI 2 "arm_rhs_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "
   if (operands[2] == const0_rtx)
     {
@@ -2973,7 +3061,8 @@
 		 (const_int 0)))]
   "TARGET_32BIT"
   "and%?\\t%0, %1, %1, asr #31"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "and")]
 )

 (define_insn "*arm_smin_insn"
@@ -2981,7 +3070,7 @@
 	(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
 		 (match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM  && !TARGET_NO_COND_EXEC"
   "@
    cmp\\t%1, %2\;movge\\t%0, %2
    cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
@@ -2995,7 +3084,7 @@
 	 (umax:SI (match_operand:SI 1 "s_register_operand" "")
 		  (match_operand:SI 2 "arm_rhs_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   ""
 )

@@ -3004,7 +3093,7 @@
 	(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
 		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM  && !TARGET_NO_COND_EXEC"
   "@
    cmp\\t%1, %2\;movcc\\t%0, %2
    cmp\\t%1, %2\;movcs\\t%0, %1
@@ -3019,7 +3108,7 @@
 	 (umin:SI (match_operand:SI 1 "s_register_operand" "")
 		  (match_operand:SI 2 "arm_rhs_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   ""
 )

@@ -3028,7 +3117,7 @@
 	(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
 		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_COND_EXEC"
   "@
    cmp\\t%1, %2\;movcs\\t%0, %2
    cmp\\t%1, %2\;movcc\\t%0, %1
@@ -3043,7 +3132,7 @@
 	 [(match_operand:SI 1 "s_register_operand" "r")
 	  (match_operand:SI 2 "s_register_operand" "r")]))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "*
   operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
 				operands[1], operands[2]);
@@ -3163,11 +3252,23 @@
   [(set (match_operand:SI            0 "register_operand" "=l,l")
 	(ashift:SI (match_operand:SI 1 "register_operand" "l,0")
 		   (match_operand:SI 2 "nonmemory_operand" "N,l")))]
-  "TARGET_THUMB1"
+  "TARGET_THUMB1 && !janus2_code"
   "lsl\\t%0, %1, %2"
   [(set_attr "length" "2")]
 )

+(define_insn "*thumb1_ashlsi3_janus2"
+  [(set (match_operand:SI            0 "register_operand" "=l,l")
+        (ashift:SI (match_operand:SI 1 "register_operand" "l,0")
+                   (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1 && janus2_code"
+  "@
+  lsl\\t%0, %1, %2
+  lsl\\t%0, %1, %2\;nop"
+  [(set_attr "length" "2,4")]
+)
+
+
 (define_expand "ashrdi3"
   [(set (match_operand:DI              0 "s_register_operand" "")
         (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "")
@@ -3200,6 +3301,7 @@
   "TARGET_32BIT"
   "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
   [(set_attr "conds" "clob")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -3219,11 +3321,22 @@
   [(set (match_operand:SI              0 "register_operand" "=l,l")
 	(ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
 		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
-  "TARGET_THUMB1"
+  "TARGET_THUMB1 && !janus2_code"
   "asr\\t%0, %1, %2"
   [(set_attr "length" "2")]
 )

+(define_insn "*thumb1_ashrsi3_janus2"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+        (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+                     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1 && janus2_code"
+  "@
+  asr\\t%0, %1, %2
+  asr\\t%0, %1, %2\;nop"
+  [(set_attr "length" "2,4")]
+)
+
 (define_expand "lshrdi3"
   [(set (match_operand:DI              0 "s_register_operand" "")
         (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "")
@@ -3256,6 +3369,7 @@
   "TARGET_32BIT"
   "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
   [(set_attr "conds" "clob")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -3278,11 +3392,22 @@
   [(set (match_operand:SI              0 "register_operand" "=l,l")
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
 		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
-  "TARGET_THUMB1"
+  "TARGET_THUMB1 && !janus2_code"
   "lsr\\t%0, %1, %2"
   [(set_attr "length" "2")]
 )

+(define_insn "*thumb1_lshrsi3_janus2"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+        (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+                     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1 && janus2_code"
+  "@
+  lsr\\t%0, %1, %2
+  lsr\\t%0, %1, %2; nop"
+  [(set_attr "length" "2,4")]
+)
+
 (define_expand "rotlsi3"
   [(set (match_operand:SI              0 "s_register_operand" "")
 	(rotatert:SI (match_operand:SI 1 "s_register_operand" "")
@@ -3324,11 +3449,20 @@
   [(set (match_operand:SI              0 "register_operand" "=l")
 	(rotatert:SI (match_operand:SI 1 "register_operand" "0")
 		     (match_operand:SI 2 "register_operand" "l")))]
-  "TARGET_THUMB1"
+  "TARGET_THUMB1 && !janus2_code"
   "ror\\t%0, %0, %2"
   [(set_attr "length" "2")]
 )

+(define_insn "*thumb1_rotrsi3_janus2"
+  [(set (match_operand:SI              0 "register_operand" "=l")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "0")
+                     (match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1 && janus2_code"
+  "ror\\t%0, %0, %2; nop"
+  [(set_attr "length" "4")]
+)
+
 (define_insn "*arm_shiftsi3"
   [(set (match_operand:SI   0 "s_register_operand" "=r")
 	(match_operator:SI  3 "shift_operator"
@@ -3340,7 +3474,11 @@
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*shiftsi3_compare0"
@@ -3357,7 +3495,11 @@
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*shiftsi3_compare0_scratch"
@@ -3370,7 +3512,11 @@
   "TARGET_32BIT"
   "* return arm_output_shift(operands, 1);"
   [(set_attr "conds" "set")
-   (set_attr "shift" "1")]
+   (set_attr "shift" "1")
+   (set (attr "length") (if_then_else (and (match_operand 2 "s_register_operand" "")
+                                           (eq_attr "fix_janus" "yes"))
+                                      (const_int 8)
+                                      (const_int 4)))]
 )

 (define_insn "*arm_notsi_shiftsi"
@@ -3382,9 +3528,14 @@
   "mvn%?\\t%0, %1%S3"
   [(set_attr "predicable" "yes")
    (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*arm_notsi_shiftsi_compare0"
@@ -3399,9 +3550,14 @@
   "mvn%.\\t%0, %1%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*arm_not_shiftsi_compare0_scratch"
@@ -3415,9 +3571,14 @@
   "mvn%.\\t%0, %1%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 ;; We don't really have extzv, but defining this using shifts helps
@@ -3550,12 +3711,12 @@
 (define_expand "negdf2"
   [(set (match_operand:DF         0 "s_register_operand" "")
 	(neg:DF (match_operand:DF 1 "s_register_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
   "")

 ;; abssi2 doesn't really clobber the condition codes if a different register
 ;; is being set.  To keep things simple, assume during rtl manipulations that
-;; it does, but tell the final scan operator the truth.  Similarly for
+;; it does, and the splitter will eliminate it.  Similarly for
 ;; (neg (abs...))

 (define_expand "abssi2"
@@ -3567,22 +3728,28 @@
   "
   if (TARGET_THUMB1)
     operands[2] = gen_rtx_SCRATCH (SImode);
+  else if (TARGET_NO_SINGLE_COND_EXEC)
+    {
+      emit_insn(gen_rtx_SET(VOIDmode, operands[0],
+			    gen_rtx_ABS(SImode, operands[1])));
+      DONE;
+    }
   else
     operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
 ")

 (define_insn "*arm_abssi2"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
-	(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "r")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
-  "@
-   cmp\\t%0, #0\;rsblt\\t%0, %0, #0
-   eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
-  [(set_attr "conds" "clob,*")
-   (set_attr "shift" "1")
+  "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC"
+  "#"
+  [(set_attr "shift" "1")
    ;; predicable can't be set based on the variant, so left as no
-   (set_attr "length" "8")]
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 10)
+		      (const_int 8)))]
 )

 (define_insn_and_split "*thumb1_abssi2"
@@ -3600,17 +3767,17 @@
 )

 (define_insn "*arm_neg_abssi2"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
-	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r"))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
-  "@
-   cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
-   eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
-  [(set_attr "conds" "clob,*")
-   (set_attr "shift" "1")
+  "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC"
+  "#"
+  [(set_attr "shift" "1")
    ;; predicable can't be set based on the variant, so left as no
-   (set_attr "length" "8")]
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 10)
+		      (const_int 8)))]
 )

 (define_insn_and_split "*thumb1_neg_abssi2"
@@ -3627,6 +3794,93 @@
   [(set_attr "length" "6")]
 )

+;; Simplified version for when avoiding conditional execution
+(define_insn "*arm_nocond_abssi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=&r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC"
+  "#"
+  [(set_attr "shift" "1")
+   (set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_nocond_neg_abssi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=&r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC"
+  "#"
+  [(set_attr "shift" "1")
+   (set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+;; Splitters for ABS patterns.
+
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])"
+  [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0)))
+   (cond_exec (lt (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (neg:SI (match_dup 1))))]
+)
+
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])"
+  [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0)))
+   (cond_exec (gt (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (neg:SI (match_dup 1))))]
+)
+
+;; GCC does not add/remove clobbers when matching splitters, so we need
+;; variants with and without the CC clobber.
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])"
+  [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+			      (match_dup 1)))
+   (set (match_dup 0) (minus:SI (match_dup 0)
+				(ashiftrt:SI (match_dup 1) (const_int 31))))]
+)
+
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])"
+  [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+			      (match_dup 1)))
+   (set (match_dup 0) (minus:SI (match_dup 0)
+				(ashiftrt:SI (match_dup 1) (const_int 31))))]
+)
+
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" ""))))]
+  "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])"
+  [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+			      (match_dup 1)))
+   (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+				(match_dup 0)))]
+)
+
+(define_split
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])"
+  [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+			      (match_dup 1)))
+   (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31))
+				(match_dup 0)))]
+)
+
 (define_expand "abssf2"
   [(set (match_operand:SF         0 "s_register_operand" "")
 	(abs:SF (match_operand:SF 1 "s_register_operand" "")))]
@@ -3636,7 +3890,7 @@
 (define_expand "absdf2"
   [(set (match_operand:DF         0 "s_register_operand" "")
 	(abs:DF (match_operand:DF 1 "s_register_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "")

 (define_expand "sqrtsf2"
@@ -3648,7 +3902,7 @@
 (define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "s_register_operand" "")
 	(sqrt:DF (match_operand:DF 1 "s_register_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
   "")

 (define_insn_and_split "one_cmpldi2"
@@ -3682,7 +3936,8 @@
 	(not:SI (match_operand:SI 1 "s_register_operand"  "r")))]
   "TARGET_32BIT"
   "mvn%?\\t%0, %1"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "mvn")]
 )

 (define_insn "*thumb1_one_cmplsi2"
@@ -3690,7 +3945,8 @@
 	(not:SI (match_operand:SI 1 "register_operand"  "l")))]
   "TARGET_THUMB1"
   "mvn\\t%0, %1"
-  [(set_attr "length" "2")]
+  [(set_attr "length" "2")
+   (set_attr "insn" "mvn")]
 )

 (define_insn "*notsi_compare0"
@@ -3701,7 +3957,8 @@
 	(not:SI (match_dup 1)))]
   "TARGET_32BIT"
   "mvn%.\\t%0, %1"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mvn")]
 )

 (define_insn "*notsi_compare0_scratch"
@@ -3711,11 +3968,40 @@
    (clobber (match_scratch:SI 0 "=r"))]
   "TARGET_32BIT"
   "mvn%.\\t%0, %1"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mvn")]
 )

 ;; Fixed <--> Floating conversion insns

+(define_expand "floatsihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:SI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+(define_expand "floatdihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:DI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
 (define_expand "floatsisf2"
   [(set (match_operand:SF           0 "s_register_operand" "")
 	(float:SF (match_operand:SI 1 "s_register_operand" "")))]
@@ -3731,7 +4017,7 @@
 (define_expand "floatsidf2"
   [(set (match_operand:DF           0 "s_register_operand" "")
 	(float:DF (match_operand:SI 1 "s_register_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   if (TARGET_MAVERICK)
     {
@@ -3740,6 +4026,30 @@
     }
 ")

+(define_expand "fix_trunchfsi2"
+  [(set (match_operand:SI         0 "general_operand" "")
+	(fix:SI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
+(define_expand "fix_trunchfdi2"
+  [(set (match_operand:DI         0 "general_operand" "")
+	(fix:DI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
 (define_expand "fix_truncsfsi2"
   [(set (match_operand:SI         0 "s_register_operand" "")
 	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand"  ""))))]
@@ -3759,7 +4069,7 @@
 (define_expand "fix_truncdfsi2"
   [(set (match_operand:SI         0 "s_register_operand" "")
 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand"  ""))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   if (TARGET_MAVERICK)
     {
@@ -3776,9 +4086,25 @@
   [(set (match_operand:SF  0 "s_register_operand" "")
 	(float_truncate:SF
  	 (match_operand:DF 1 "s_register_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   ""
 )
+
+/* DFmode -> HFmode conversions have to go through SFmode.  */
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF  0 "general_operand" "")
+	(float_truncate:HF
+ 	 (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)

 ;; Zero and sign extension instructions.

@@ -3800,6 +4126,7 @@
     return \"mov%?\\t%R0, #0\";
   "
   [(set_attr "length" "8")
+   (set_attr "insn" "mov")
    (set_attr "predicable" "yes")]
 )

@@ -3843,6 +4170,7 @@
   "
   [(set_attr "length" "8")
    (set_attr "shift" "1")
+   (set_attr "insn" "mov")
    (set_attr "predicable" "yes")]
 )

@@ -4123,6 +4451,28 @@
   ""
 )

+(define_code_iterator ior_xor [ior xor])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ior_xor:SI (and:SI (ashift:SI
+			     (match_operand:SI 1 "s_register_operand" "")
+			     (match_operand:SI 2 "const_int_operand" ""))
+			    (match_operand:SI 3 "const_int_operand" ""))
+		    (zero_extend:SI
+		     (match_operator 5 "subreg_lowpart_operator"
+		      [(match_operand:SI 4 "s_register_operand" "")]))))]
+  "TARGET_32BIT
+   && (INTVAL (operands[3])
+       == (GET_MODE_MASK (GET_MODE (operands[5]))
+           & (GET_MODE_MASK (GET_MODE (operands[5]))
+	      << (INTVAL (operands[2])))))"
+  [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
+				  (match_dup 4)))
+   (set (match_dup 0) (zero_extend:SI (match_dup 5)))]
+  "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
+)
+
 (define_insn "*compareqi_eq0"
   [(set (reg:CC_Z CC_REGNUM)
 	(compare:CC_Z (match_operand:QI 0 "s_register_operand" "r")
@@ -4639,9 +4989,24 @@
 (define_expand "extendsfdf2"
   [(set (match_operand:DF                  0 "s_register_operand" "")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand"  "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   ""
 )
+
+/* HFmode -> DFmode conversions have to go through SFmode.  */
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF                  0 "general_operand" "")
+	(float_extend:DF (match_operand:HF 1 "general_operand"  "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (DFmode, op1, 0);
+    emit_insn (gen_movdf (operands[0], op1));
+    DONE;
+  }"
+)

 ;; Move insns (including loads and stores)

@@ -4877,6 +5242,7 @@
   }"
   [(set_attr "length" "4,4,6,2,2,6,4,4")
    (set_attr "type" "*,*,*,load2,store2,load2,store2,*")
+   (set_attr "insn" "*,mov,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,*,*,1020,*,*")]
 )

@@ -4903,14 +5269,6 @@
 			       optimize && can_create_pseudo_p ());
           DONE;
         }
-
-      if (TARGET_USE_MOVT && !target_word_relocations
-	  && GET_CODE (operands[1]) == SYMBOL_REF
-	  && !flag_pic && !arm_tls_referenced_p (operands[1]))
-	{
-	  arm_emit_movpair (operands[0], operands[1]);
-	  DONE;
-	}
     }
   else /* TARGET_THUMB1...  */
     {
@@ -4984,18 +5342,9 @@
    (set_attr "length" "4")]
 )

-(define_insn "*arm_movw"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
-	(high:SI (match_operand:SI 1 "general_operand"      "i")))]
-  "TARGET_32BIT"
-  "movw%?\t%0, #:lower16:%c1"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")]
-)
-
 (define_insn "*arm_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
-	(match_operand:SI 1 "general_operand"      "rk, I,K,N,mi,rk"))]
+	(match_operand:SI 1 "general_operand"      "rk, I,K,j,mi,rk"))]
   "TARGET_ARM && ! TARGET_IWMMXT
    && !(TARGET_HARD_FLOAT && TARGET_VFP)
    && (   register_operand (operands[0], SImode)
@@ -5008,6 +5357,7 @@
    ldr%?\\t%0, %1
    str%?\\t%1, %0"
   [(set_attr "type" "*,*,*,*,load1,store1")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,*,*,*,4096,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*")]
@@ -5027,6 +5377,19 @@
   "
 )

+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  "TARGET_32BIT
+   && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF
+   && !flag_pic && !target_word_relocations
+   && !arm_tls_referenced_p (operands[1])"
+  [(clobber (const_int 0))]
+{
+  arm_emit_movpair (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "*thumb1_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*lhk")
 	(match_operand:SI 1 "general_operand"      "l, I,J,K,>,l,mi,l,*lhk"))]
@@ -5065,7 +5428,7 @@
    (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))]
   "
   {
-    unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+    unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu;
     unsigned HOST_WIDE_INT mask = 0xff;
     int i;

@@ -5627,6 +5990,7 @@
    ldr%(h%)\\t%0, %1\\t%@ movhi"
   [(set_attr "type" "*,*,store1,load1")
    (set_attr "predicable" "yes")
+   (set_attr "insn" "mov,mvn,*,*")
    (set_attr "pool_range" "*,*,*,256")
    (set_attr "neg_pool_range" "*,*,*,244")]
 )
@@ -5638,7 +6002,8 @@
   "@
    mov%?\\t%0, %1\\t%@ movhi
    mvn%?\\t%0, #%B1\\t%@ movhi"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "mov,mvn")]
 )

 (define_expand "thumb_movhi_clobber"
@@ -5769,6 +6134,7 @@
    ldr%(b%)\\t%0, %1
    str%(b%)\\t%1, %0"
   [(set_attr "type" "*,*,load1,store1")
+   (set_attr "insn" "mov,mvn,*,*")
    (set_attr "predicable" "yes")]
 )

@@ -5787,9 +6153,111 @@
    mov\\t%0, %1"
   [(set_attr "length" "2")
    (set_attr "type" "*,load1,store1,*,*,*")
+   (set_attr "insn" "*,*,*,mov,mov,mov")
    (set_attr "pool_range" "*,32,*,*,*,*")]
 )

+;; HFmode moves
+(define_expand "movhf"
+  [(set (match_operand:HF 0 "general_operand" "")
+	(match_operand:HF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        operands[1] = force_reg (HFmode, operands[1]);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+           if (GET_CODE (operands[0]) != REG)
+	     operands[1] = force_reg (HFmode, operands[1]);
+        }
+    }
+  "
+)
+
+(define_insn "*arm32_movhf"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,r,F"))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16)
+   && (	  s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:	/* ARM register from memory */
+      return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\";
+    case 1:	/* memory from ARM register */
+      return \"str%(h%)\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* ARM register from ARM register */
+      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+    case 3:	/* ARM register from constant */
+      {
+	REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+	REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw%?\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,*,*")
+   (set_attr "length" "4,4,4,8")
+   (set_attr "predicable" "yes")
+   ]
+)
+
+(define_insn "*thumb1_movhf"
+  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
+	(match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
+  "TARGET_THUMB1
+   && (	  s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 1:
+      {
+	rtx addr;
+	gcc_assert (GET_CODE(operands[1]) == MEM);
+	addr = XEXP (operands[1], 0);
+	if (GET_CODE (addr) == LABEL_REF
+	    || (GET_CODE (addr) == CONST
+		&& GET_CODE (XEXP (addr, 0)) == PLUS
+		&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
+		&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))
+	  {
+	    /* Constant pool entry.  */
+	    return \"ldr\\t%0, %1\";
+	  }
+	return \"ldrh\\t%0, %1\";
+      }
+    case 2: return \"strh\\t%1, %0\";
+    default: return \"mov\\t%0, %1\";
+    }
+  "
+  [(set_attr "length" "2")
+   (set_attr "type" "*,load1,store1,*,*")
+   (set_attr "pool_range" "*,1020,*,*,*")]
+)
+
 (define_expand "movsf"
   [(set (match_operand:SF 0 "general_operand" "")
 	(match_operand:SF 1 "general_operand" ""))]
@@ -5842,6 +6310,7 @@
   [(set_attr "length" "4,4,4")
    (set_attr "predicable" "yes")
    (set_attr "type" "*,load1,store1")
+   (set_attr "insn" "mov,*,*")
    (set_attr "pool_range" "*,4096,*")
    (set_attr "neg_pool_range" "*,4084,*")]
 )
@@ -6297,7 +6766,7 @@
    (match_operand:BLK 1 "general_operand" "")
    (match_operand:SI 2 "const_int_operand" "")
    (match_operand:SI 3 "const_int_operand" "")]
-  "TARGET_EITHER"
+  "TARGET_EITHER && !low_irq_latency"
   "
   if (TARGET_32BIT)
     {
@@ -7476,7 +7945,7 @@
 (define_expand "cmpdf"
   [(match_operand:DF 0 "s_register_operand" "")
    (match_operand:DF 1 "arm_float_compare_operand" "")]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   "
   arm_compare_op0 = operands[0];
   arm_compare_op1 = operands[1];
@@ -7507,7 +7976,11 @@
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*arm_cmpsi_shiftsi_swp"
@@ -7522,7 +7995,11 @@
    (set_attr "shift" "1")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*arm_cmpsi_negshiftsi_si"
@@ -7537,7 +8014,11 @@
   [(set_attr "conds" "set")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
 				    (const_string "alu_shift")
-				    (const_string "alu_shift_reg")))]
+                                    (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 ;; Cirrus SF compare instruction
@@ -7879,77 +8360,77 @@
 (define_expand "seq"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(eq:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (EQ, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sne"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(ne:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (NE, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sgt"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(gt:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (GT, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sle"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(le:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (LE, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sge"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(ge:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (GE, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "slt"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(lt:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (LT, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sgtu"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(gtu:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (GTU, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sleu"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(leu:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (LEU, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sgeu"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(geu:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (GEU, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sltu"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(ltu:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (LTU, arm_compare_op0, arm_compare_op1);"
 )

 (define_expand "sunordered"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(unordered:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (UNORDERED, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -7957,7 +8438,7 @@
 (define_expand "sordered"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(ordered:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (ORDERED, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -7965,7 +8446,7 @@
 (define_expand "sungt"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(ungt:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (UNGT, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -7973,7 +8454,7 @@
 (define_expand "sunge"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(unge:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (UNGE, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -7981,7 +8462,7 @@
 (define_expand "sunlt"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(unlt:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (UNLT, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -7989,7 +8470,7 @@
 (define_expand "sunle"
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(unle:SI (match_dup 1) (const_int 0)))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC"
   "operands[1] = arm_gen_compare_reg (UNLE, arm_compare_op0,
 				      arm_compare_op1);"
 )
@@ -8018,6 +8499,7 @@
   "TARGET_ARM"
   "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -8028,6 +8510,7 @@
   "TARGET_ARM"
   "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -8038,6 +8521,7 @@
   "TARGET_ARM"
   "mov%D1\\t%0, #0\;mvn%d1\\t%0, #1"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -8241,7 +8725,7 @@
 	(if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
 			 (match_operand:SI 2 "arm_not_operand" "")
 			 (match_operand:SI 3 "arm_not_operand" "")))]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_COND_EXEC"
   "
   {
     enum rtx_code code = GET_CODE (operands[1]);
@@ -8260,7 +8744,7 @@
 	(if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
 			 (match_operand:SF 2 "s_register_operand" "")
 			 (match_operand:SF 3 "nonmemory_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_NO_COND_EXEC"
   "
   {
     enum rtx_code code = GET_CODE (operands[1]);
@@ -8285,7 +8769,7 @@
 	(if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
 			 (match_operand:DF 2 "s_register_operand" "")
 			 (match_operand:DF 3 "arm_float_add_operand" "")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE) && !TARGET_NO_COND_EXEC"
   "
   {
     enum rtx_code code = GET_CODE (operands[1]);
@@ -8317,7 +8801,8 @@
    mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
    mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
   [(set_attr "length" "4,4,4,4,8,8,8,8")
-   (set_attr "conds" "use")]
+   (set_attr "conds" "use")
+   (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")]
 )

 (define_insn "*movsfcc_soft_insn"
@@ -8330,7 +8815,8 @@
   "@
    mov%D3\\t%0, %2
    mov%d3\\t%0, %1"
-  [(set_attr "conds" "use")]
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")]
 )


@@ -8733,7 +9219,7 @@
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
                       (return)
                       (pc)))]
-  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+  "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC"
   "*
   {
     if (arm_ccfsm_state == 2)
@@ -8754,7 +9240,7 @@
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
                       (pc)
 		      (return)))]
-  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+  "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC"
   "*
   {
     if (arm_ccfsm_state == 2)
@@ -9072,7 +9558,11 @@
    (set_attr "shift" "4")
    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_split
@@ -9110,7 +9600,11 @@
    (set_attr "shift" "4")
    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*arith_shiftsi_compare0_scratch"
@@ -9128,7 +9622,11 @@
    (set_attr "shift" "4")
    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*sub_shiftsi"
@@ -9143,7 +9641,11 @@
    (set_attr "shift" "3")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*sub_shiftsi_compare0"
@@ -9163,7 +9665,11 @@
    (set_attr "shift" "3")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )

 (define_insn "*sub_shiftsi_compare0_scratch"
@@ -9181,7 +9687,11 @@
    (set_attr "shift" "3")
    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 8)
+                                        (const_int 4)))]
 )


@@ -9194,6 +9704,7 @@
   "TARGET_ARM"
   "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
    (set_attr "length" "8")]
 )

@@ -9207,6 +9718,7 @@
    orr%d2\\t%0, %1, #1
    mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
   [(set_attr "conds" "use")
+   (set_attr "insn" "orr")
    (set_attr "length" "4,8")]
 )

@@ -9216,7 +9728,7 @@
 	 [(match_operand:SI 2 "s_register_operand" "r,r")
 	  (match_operand:SI 3 "arm_add_operand" "rI,L")]))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_COND_EXEC"
   "*
     if (operands[3] == const0_rtx)
       {
@@ -9271,6 +9783,7 @@
     return \"\";
   "
   [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
    (set_attr "length" "4,4,8")]
 )

@@ -9282,7 +9795,7 @@
 	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
           (match_operand:SI 1 "s_register_operand" "0,?r")]))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "*
     if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
       return \"%i5\\t%0, %1, %2, lsr #31\";
@@ -9678,7 +10191,7 @@
 	 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
 	 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_COND_EXEC"
   "*
   if (GET_CODE (operands[5]) == LT
       && (operands[4] == const0_rtx))
@@ -9744,7 +10257,7 @@
 			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))
 			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -9780,7 +10293,7 @@
 			  (match_operand:SI 2 "s_register_operand" "r,r")
 			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -9818,7 +10331,7 @@
 			  [(match_operand:SI 3 "s_register_operand" "r")
 			   (match_operand:SI 4 "arm_rhs_operand" "rI")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "12")]
@@ -9968,7 +10481,7 @@
 	 (not:SI
 	  (match_operand:SI 2 "s_register_operand" "r,r"))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -9987,6 +10500,7 @@
    mov%d4\\t%0, %1\;mvn%D4\\t%0, %2
    mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
    (set_attr "length" "4,8,8")]
 )

@@ -10000,7 +10514,7 @@
 	  (match_operand:SI 2 "s_register_operand" "r,r"))
 	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -10019,6 +10533,7 @@
    mov%D4\\t%0, %1\;mvn%d4\\t%0, %2
    mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
    (set_attr "length" "4,8,8")]
 )

@@ -10033,7 +10548,7 @@
 	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])
 	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -10055,10 +10570,23 @@
    mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
   [(set_attr "conds" "use")
    (set_attr "shift" "2")
-   (set_attr "length" "4,8,8")
+   (set_attr "insn" "mov")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set_attr_alternative "length"
+           [(if_then_else (and (eq_attr "type" "alu_shift_reg")
+                               (eq_attr "fix_janus" "yes"))
+                          (const_int 8)
+                          (const_int 4))
+            (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                               (eq_attr "fix_janus" "yes"))
+                          (const_int 12)
+                          (const_int 8))
+            (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                               (eq_attr "fix_janus" "yes"))
+                          (const_int 12)
+                          (const_int 8))])]
 )

 (define_insn "*ifcompare_move_shift"
@@ -10072,7 +10600,7 @@
 	  [(match_operand:SI 2 "s_register_operand" "r,r")
 	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -10094,10 +10622,24 @@
    mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
   [(set_attr "conds" "use")
    (set_attr "shift" "2")
-   (set_attr "length" "4,8,8")
+   (set_attr "insn" "mov")
    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set_attr_alternative "length"
+        [(if_then_else (and (eq_attr "type" "alu_shift_reg")
+                            (eq_attr "fix_janus" "yes"))
+                       (const_int 8)
+                       (const_int 4))
+         (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                            (eq_attr "fix_janus" "yes"))
+                       (const_int 12)
+                       (const_int 8))
+         (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                            (eq_attr "fix_janus" "yes"))
+                       (const_int 12)
+                       (const_int 8))])
+     (set_attr "insn" "mov")]
 )

 (define_insn "*ifcompare_shift_shift"
@@ -10113,7 +10655,7 @@
 	  [(match_operand:SI 3 "s_register_operand" "r")
 	   (match_operand:SI 4 "arm_rhs_operand" "rM")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "12")]
@@ -10134,12 +10676,16 @@
   "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
   [(set_attr "conds" "use")
    (set_attr "shift" "1")
-   (set_attr "length" "8")
+   (set_attr "insn" "mov")
    (set (attr "type") (if_then_else
 		        (and (match_operand 2 "const_int_operand" "")
                              (match_operand 4 "const_int_operand" ""))
 		      (const_string "alu_shift")
-		      (const_string "alu_shift_reg")))]
+                      (const_string "alu_shift_reg")))
+     (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg")
+                                             (eq_attr "fix_janus" "yes"))
+                                        (const_int 16)
+                                        (const_int 8)))]
 )

 (define_insn "*ifcompare_not_arith"
@@ -10153,7 +10699,7 @@
 	  [(match_operand:SI 2 "s_register_operand" "r")
 	   (match_operand:SI 3 "arm_rhs_operand" "rI")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "12")]
@@ -10171,6 +10717,7 @@
   "TARGET_ARM"
   "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
    (set_attr "length" "8")]
 )

@@ -10185,7 +10732,7 @@
 	   (match_operand:SI 3 "arm_rhs_operand" "rI")])
 	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "12")]
@@ -10203,6 +10750,7 @@
   "TARGET_ARM"
   "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
    (set_attr "length" "8")]
 )

@@ -10215,7 +10763,7 @@
 	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))
 	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -10246,7 +10794,7 @@
 	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
 	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC"
   "#"
   [(set_attr "conds" "clob")
    (set_attr "length" "8,12")]
@@ -10614,7 +11162,7 @@
 			 (match_dup 0)
 			 (match_operand 4 "" "")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC"
   [(set (match_dup 5) (match_dup 6))
    (cond_exec (match_dup 7)
 	      (set (match_dup 0) (match_dup 4)))]
@@ -10642,7 +11190,7 @@
 			 (match_operand 4 "" "")
 			 (match_dup 0)))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC"
   [(set (match_dup 5) (match_dup 6))
    (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)])
 	      (set (match_dup 0) (match_dup 4)))]
@@ -10663,7 +11211,7 @@
 			 (match_operand 4 "" "")
 			 (match_operand 5 "" "")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC"
   [(set (match_dup 6) (match_dup 7))
    (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
 	      (set (match_dup 0) (match_dup 4)))
@@ -10695,7 +11243,7 @@
 			 (not:SI
 			  (match_operand:SI 5 "s_register_operand" ""))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC"
   [(set (match_dup 6) (match_dup 7))
    (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
 	      (set (match_dup 0) (match_dup 4)))
@@ -10730,6 +11278,7 @@
    mvn%D4\\t%0, %2
    mov%d4\\t%0, %1\;mvn%D4\\t%0, %2"
   [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
    (set_attr "length" "4,8")]
 )

@@ -10864,6 +11413,24 @@
   "
 )

+(define_insn "align_16"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN16)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (128);
+  return \"\";
+  "
+)
+
+(define_insn "align_32"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN32)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (256);
+  return \"\";
+  "
+)
+
 (define_insn "consttable_end"
   [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)]
   "TARGET_EITHER"
@@ -10890,6 +11457,7 @@
   "TARGET_THUMB1"
   "*
   making_const_table = TRUE;
+  gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT);
   assemble_integer (operands[0], 2, BITS_PER_WORD, 1);
   assemble_zeros (2);
   return \"\";
@@ -10902,19 +11470,30 @@
   "TARGET_EITHER"
   "*
   {
+    rtx x = operands[0];
     making_const_table = TRUE;
-    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+    switch (GET_MODE_CLASS (GET_MODE (x)))
       {
       case MODE_FLOAT:
-      {
-        REAL_VALUE_TYPE r;
-        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
-        assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
-        break;
-      }
+        if (GET_MODE (x) == HFmode)
+	  arm_emit_fp16_const (x);
+	else
+ 	  {
+	    REAL_VALUE_TYPE r;
+	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	    assemble_real (r, GET_MODE (x), BITS_PER_WORD);
+	  }
+	break;
       default:
-        assemble_integer (operands[0], 4, BITS_PER_WORD, 1);
-	mark_symbol_refs_as_used (operands[0]);
+	/* XXX: Sometimes gcc does something really dumb and ends up with
+	   a HIGH in a constant pool entry, usually because it's trying to
+	   load into a VFP register.  We know this will always be used in
+	   combination with a LO_SUM which ignores the high bits, so just
+	   strip off the HIGH.  */
+	if (GET_CODE (x) == HIGH)
+	  x = XEXP (x, 0);
+        assemble_integer (x, 4, BITS_PER_WORD, 1);
+	mark_symbol_refs_as_used (x);
         break;
       }
     return \"\";
@@ -11008,6 +11587,28 @@
   [(set_attr "predicable" "yes")
    (set_attr "insn" "clz")])

+(define_insn "rbitsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "rbit%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "clz")])
+
+(define_expand "ctzsi2"
+ [(set (match_operand:SI           0 "s_register_operand" "")
+       (ctz:SI (match_operand:SI  1 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "
+   {
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_insn (gen_rbitsi2 (tmp, operands[1]));
+     emit_insn (gen_clzsi2 (operands[0], tmp));
+   }
+   DONE;
+  "
+)
+
 ;; V5E instructions.

 (define_insn "prefetch"
@@ -11017,13 +11618,15 @@
   "TARGET_32BIT && arm_arch5e"
   "pld\\t%a0")

-;; General predication pattern
+;; General predication pattern.
+;; Conditional branches are available as both arm_cond_branch and
+;; predicated arm_jump, so it doesn't matter if we disable the latter.

 (define_cond_exec
   [(match_operator 0 "arm_comparison_operator"
     [(match_operand 1 "cc_register" "")
      (const_int 0)])]
-  "TARGET_32BIT"
+  "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC"
   ""
 )

--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -25,6 +25,11 @@
    FIXME What format is this?  */
 FLOAT_MODE (XF, 12, 0);

+/* Half-precision floating point */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+			  ? &arm_half_format : &ieee_half_format));
+
 /* CCFPEmode should be used with floating inequalities,
    CCFPmode should be used with floating equalities.
    CC_NOOVmode should be used with SImode integer equalities.
@@ -62,6 +67,4 @@
 INT_MODE (EI, 24);
 INT_MODE (OI, 32);
 INT_MODE (CI, 48);
-/* ??? This should actually have 512 bits but the precision only has 9
-   bits.  */
-FRACTIONAL_INT_MODE (XI, 511, 64);
+INT_MODE (XI, 64);
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -61,7 +61,7 @@
 typedef __builtin_neon_usi uint32x4_t	__attribute__ ((__vector_size__ (16)));
 typedef __builtin_neon_udi uint64x2_t	__attribute__ ((__vector_size__ (16)));

-typedef __builtin_neon_sf float32_t;
+typedef float float32_t;
 typedef __builtin_neon_poly8 poly8_t;
 typedef __builtin_neon_poly16 poly16_t;

@@ -5085,7 +5085,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5151,7 +5151,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5283,7 +5283,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vdup_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5349,7 +5349,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vdupq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5415,7 +5415,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmov_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5481,7 +5481,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmovq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -6591,7 +6591,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmul_n_f32 (float32x2_t __a, float32_t __b)
 {
-  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3);
+  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
 }

 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6621,7 +6621,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmulq_n_f32 (float32x4_t __a, float32_t __b)
 {
-  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3);
+  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
 }

 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6735,7 +6735,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }

 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6765,7 +6765,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }

 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6831,7 +6831,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }

 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6861,7 +6861,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }

 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -7851,7 +7851,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1v2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -7917,7 +7917,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1v4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -7977,7 +7977,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8043,7 +8043,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8109,7 +8109,7 @@
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_dup_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
 }

 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8175,7 +8175,7 @@
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
 }

 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8247,7 +8247,7 @@
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f32 (float32_t * __a, float32x2_t __b)
 {
-  __builtin_neon_vst1v2sf (__a, __b);
+  __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8313,7 +8313,7 @@
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_f32 (float32_t * __a, float32x4_t __b)
 {
-  __builtin_neon_vst1v4sf (__a, __b);
+  __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8373,7 +8373,7 @@
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev2sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8439,7 +8439,7 @@
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev4sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8512,7 +8512,7 @@
 vld2_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v2sf (__a);
+  __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -8600,7 +8600,7 @@
 vld2q_f32 (const float32_t * __a)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v4sf (__a);
+  __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -8676,7 +8676,7 @@
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -8748,7 +8748,7 @@
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -8807,7 +8807,7 @@
 vld2_dup_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -8892,7 +8892,7 @@
 vst2_f32 (float32_t * __a, float32x2x2_t __b)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2v2sf (__a, __bu.__o);
+  __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8969,7 +8969,7 @@
 vst2q_f32 (float32_t * __a, float32x4x2_t __b)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2v4sf (__a, __bu.__o);
+  __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9032,7 +9032,7 @@
 vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9088,7 +9088,7 @@
 vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9140,7 +9140,7 @@
 vld3_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v2sf (__a);
+  __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -9228,7 +9228,7 @@
 vld3q_f32 (const float32_t * __a)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v4sf (__a);
+  __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -9304,7 +9304,7 @@
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -9376,7 +9376,7 @@
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -9435,7 +9435,7 @@
 vld3_dup_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -9520,7 +9520,7 @@
 vst3_f32 (float32_t * __a, float32x2x3_t __b)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3v2sf (__a, __bu.__o);
+  __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9597,7 +9597,7 @@
 vst3q_f32 (float32_t * __a, float32x4x3_t __b)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3v4sf (__a, __bu.__o);
+  __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9660,7 +9660,7 @@
 vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9716,7 +9716,7 @@
 vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9768,7 +9768,7 @@
 vld4_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v2sf (__a);
+  __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -9856,7 +9856,7 @@
 vld4q_f32 (const float32_t * __a)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v4sf (__a);
+  __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -9932,7 +9932,7 @@
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -10004,7 +10004,7 @@
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }

@@ -10063,7 +10063,7 @@
 vld4_dup_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }

@@ -10148,7 +10148,7 @@
 vst4_f32 (float32_t * __a, float32x2x4_t __b)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4v2sf (__a, __bu.__o);
+  __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10225,7 +10225,7 @@
 vst4q_f32 (float32_t * __a, float32x4x4_t __b)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4v4sf (__a, __bu.__o);
+  __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10288,7 +10288,7 @@
 vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10344,7 +10344,7 @@
 vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }

 __extension__ static __inline void __attribute__ ((__always_inline__))
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -78,6 +78,10 @@
 mfp=
 Target RejectNegative Joined Undocumented Var(target_fpe_name)

+mfp16-format=
+Target RejectNegative Joined Var(target_fp16_format_name)
+Specify the __fp16 floating-point format
+
 ;; Now ignored.
 mfpe
 Target RejectNegative Mask(FPE) Undocumented
@@ -93,6 +97,10 @@
 Target RejectNegative
 Alias for -mfloat-abi=hard

+mfix-janus-2cc
+Target Report Mask(FIX_JANUS)
+Work around hardware errata for Avalent Janus 2CC cores.
+
 mlittle-endian
 Target Report RejectNegative InverseMask(BIG_END)
 Assume target CPU is configured as little endian
@@ -101,6 +109,10 @@
 Target Report Mask(LONG_CALLS)
 Generate call insns as indirect calls, if necessary

+mmarvell-div
+Target Report Mask(MARVELL_DIV)
+Generate hardware integer division instructions supported by some Marvell cores.
+
 mpic-register=
 Target RejectNegative Joined Var(arm_pic_register_string)
 Specify the register to be used for PIC addressing
@@ -157,6 +169,10 @@
 Target Report Mask(NEON_VECTORIZE_QUAD)
 Use Neon quad-word (rather than double-word) registers for vectorization

+mlow-irq-latency
+Target Report Var(low_irq_latency)
+Try to reduce interrupt latency of the generated code
+
 mword-relocations
 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -88,7 +88,7 @@

 extern int cirrus_memory_offset (rtx);
 extern int arm_coproc_mem_operand (rtx, bool);
-extern int neon_vector_mem_operand (rtx, bool);
+extern int neon_vector_mem_operand (rtx, int);
 extern int neon_struct_mem_operand (rtx);
 extern int arm_no_early_store_addr_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_dep (rtx, rtx);
@@ -144,6 +144,7 @@
 extern int arm_debugger_arg_offset (int, rtx);
 extern bool arm_is_long_call_p (tree);
 extern int    arm_emit_vector_const (FILE *, rtx);
+extern void arm_emit_fp16_const (rtx c);
 extern const char * arm_output_load_gr (rtx *);
 extern const char *vfp_output_fstmd (rtx *);
 extern void arm_set_return_address (rtx, rtx);
@@ -154,13 +155,15 @@

 #if defined TREE_CODE
 extern rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+extern void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      tree, bool);
 extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
 extern bool arm_pad_arg_upward (enum machine_mode, const_tree);
 extern bool arm_pad_reg_upward (enum machine_mode, tree, int);
 extern bool arm_needs_doubleword_align (enum machine_mode, tree);
-extern rtx arm_function_value(const_tree, const_tree);
 #endif
 extern int arm_apply_result_size (void);
+extern rtx aapcs_libcall_value (enum machine_mode);

 #endif /* RTX_CODE */

--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from arm-cores.def
 (define_attr "tune"
-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,marvell_f,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1,cortexm0"
 	(const (symbol_ref "arm_tune")))
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -30,7 +30,7 @@

 /* Section 4.1 of the AAPCS requires the use of VFP format.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"

 /* TARGET_BIG_ENDIAN_DEFAULT is set in
    config.gcc for big endian configurations.  */
@@ -53,6 +53,8 @@

 #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"

+#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}"
+
 /* Tell the assembler to build BPABI binaries.  */
 #undef  SUBTARGET_EXTRA_ASM_SPEC
 #define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
@@ -65,7 +67,7 @@
 #define BPABI_LINK_SPEC \
   "%{mbig-endian:-EB} %{mlittle-endian:-EL} "		\
   "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} "	\
-  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC
+  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC

 #undef  LINK_SPEC
 #define LINK_SPEC BPABI_LINK_SPEC
@@ -90,16 +92,22 @@
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul)
 #endif
 #ifdef L_fixdfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) \
+  extern DWtype __fixdfdi (DFtype) __attribute__((pcs("aapcs"))); \
+  extern UDWtype __fixunsdfdi (DFtype) __asm__("__aeabi_d2ulz") __attribute__((pcs("aapcs")));
 #endif
 #ifdef L_fixunsdfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) \
+  extern UDWtype __fixunsdfdi (DFtype) __attribute__((pcs("aapcs")));
 #endif
 #ifdef L_fixsfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) \
+  extern DWtype __fixsfdi (SFtype) __attribute__((pcs("aapcs"))); \
+  extern UDWtype __fixunssfdi (SFtype) __asm__("__aeabi_f2ulz") __attribute__((pcs("aapcs")));
 #endif
 #ifdef L_fixunssfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) \
+  extern UDWtype __fixunssfdi (SFtype) __attribute__((pcs("aapcs")));
 #endif
 #ifdef L_floatdidf
 #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdidf, l2d)
--- a/gcc/config/arm/bpabi.S
+++ b/gcc/config/arm/bpabi.S
@@ -64,20 +64,69 @@

 #endif /* L_aeabi_ulcmp */

+.macro test_div_by_zero signed
+/* Tail-call to divide-by-zero handlers which may be overridden by the user,
+   so unwinding works properly.  */
+#if defined(__thumb2__)
+	cbnz	yyh, 1f
+	cbnz	yyl, 1f
+	cmp	xxh, #0
+	do_it	eq
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	beq	2f
+	mov	xxh, #0xffffffff
+	mov	xxl, xxh
+2:
+	.else
+	do_it	lt, t
+	movlt	xxl, #0
+	movlt	xxh, #0x80000000
+	do_it	gt, t
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+1:
+#else
+	/* Note: Thumb-1 code calls via an ARM shim on processors which
+	   support ARM mode.  */
+	cmp	yyh, #0
+	cmpeq	yyl, #0
+	bne	2f
+	cmp	xxh, #0
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	movne	xxh, #0xffffffff
+	movne	xxl, #0xffffffff
+	.else
+	movlt	xxh, #0x80000000
+	movlt	xxl, #0
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+2:
+#endif
+.endm
+
 #ifdef L_aeabi_ldivmod

 ARM_FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
 	sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
 	mov ip, sp
-	push {ip, lr}
+	do_push (ip, lr)
 #else
-	do_push {sp, lr}
+	stmfd sp!, {sp, lr}
 #endif
 	bl SYM(__gnu_ldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
 	add sp, sp, #8
-	do_pop {r2, r3}
+	do_pop (r2, r3)
 	RET

 #endif /* L_aeabi_ldivmod */
@@ -85,17 +134,20 @@
 #ifdef L_aeabi_uldivmod

 ARM_FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
 	sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly.  */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
 	mov ip, sp
-	push {ip, lr}
+	do_push (ip, lr)
 #else
-	do_push {sp, lr}
+	stmfd sp!, {sp, lr}
 #endif
 	bl SYM(__gnu_uldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
 	add sp, sp, #8
-	do_pop {r2, r3}
+	do_pop (r2, r3)
 	RET

 #endif /* L_aeabi_divmod */
--- a/gcc/config/arm/bpabi-v6m.S
+++ b/gcc/config/arm/bpabi-v6m.S
@@ -69,9 +69,52 @@

 #endif /* L_aeabi_ulcmp */

+.macro test_div_by_zero signed
+	cmp	yyh, #0
+	bne	7f
+	cmp	yyl, #0
+	bne	7f
+	cmp	xxh, #0
+	bne	2f
+	cmp	xxl, #0
+2:
+	.ifc	\signed, unsigned
+	beq	3f
+	mov	xxh, #0
+	mvn	xxh, xxh		@ 0xffffffff
+	mov	xxl, xxh
+3:
+	.else
+	beq	5f
+	blt	6f
+	mov	xxl, #0
+	mvn	xxl, xxl		@ 0xffffffff
+	lsr	xxh, xxl, #1		@ 0x7fffffff
+	b	5f
+6:	mov	xxh, #0x80
+	lsl	xxh, xxh, #24		@ 0x80000000
+	mov	xxl, #0
+5:
+	.endif
+	@ tailcalls are tricky on v6-m.
+	push	{r0, r1, r2}
+	ldr	r0, 1f
+	adr	r1, 1f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+1:
+	.word	__aeabi_ldiv0 - 1b
+7:
+.endm
+
 #ifdef L_aeabi_ldivmod

 FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
 	push {r0, r1}
 	mov r0, sp
 	push {r0, lr}
@@ -89,6 +132,8 @@
 #ifdef L_aeabi_uldivmod

 FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
 	push {r0, r1}
 	mov r0, sp
 	push {r0, lr}
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -25,14 +25,15 @@
 ;; In ARM state, 'l' is an alias for 'r'

 ;; The following normal constraints have been used:
-;; in ARM/Thumb-2 state: G, H, I, J, K, L, M
+;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy
+;; in Thumb-1 state: Pa, Pb

 ;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Us
+;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
 ;; in ARM state: Uq


@@ -65,6 +66,13 @@
 (define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS"
  "In Thumb state the core registers @code{r8}-@code{r15}.")

+(define_constraint "j"
+ "A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
+ (and (match_test "TARGET_32BIT && arm_arch_thumb2")
+      (ior (match_code "high")
+	   (and (match_code "const_int")
+                (match_test "(ival & 0xffff0000) == 0")))))
+
 (define_register_constraint "k" "STACK_REG"
  "@internal The stack register.")

@@ -116,11 +124,9 @@
 		   : ((ival >= 0 && ival <= 1020) && ((ival & 3) == 0))")))

 (define_constraint "N"
- "In ARM/Thumb-2 state a constant suitable for a MOVW instruction.
-  In Thumb-1 state a constant in the range 0-31."
+ "Thumb-1 state a constant in the range 0-31."
  (and (match_code "const_int")
-      (match_test "TARGET_32BIT ? arm_arch_thumb2 && ((ival & 0xffff0000) == 0)
-				: (ival >= 0 && ival <= 31)")))
+      (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)")))

 (define_constraint "O"
  "In Thumb-1 state a constant that is a multiple of 4 in the range
@@ -129,6 +135,18 @@
       (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508
 		   && ((ival & 3) == 0)")))

+(define_constraint "Pa"
+  "@internal In Thumb-1 state a constant in the range -510 to +510"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510
+		    && (ival > 255 || ival < -255)")))
+
+(define_constraint "Pb"
+  "@internal In Thumb-1 state a constant in the range -262 to +262"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262
+		    && (ival > 255 || ival < -255)")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")
@@ -189,10 +207,17 @@
 (define_constraint "Dv"
  "@internal
   In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts
-  or fconstd instruction."
+  instruction."
  (and (match_code "const_double")
       (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)")))

+(define_constraint "Dy"
+ "@internal
+  In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd
+  instruction."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
+
 (define_memory_constraint "Ut"
  "@internal
   In ARM/Thumb-2 state an address valid for loading/storing opaque structure
@@ -214,17 +239,24 @@

 (define_memory_constraint "Un"
  "@internal
+  In ARM/Thumb-2 state a valid address for Neon doubleword vector
+  load/store instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)")))
+
+(define_memory_constraint "Um"
+ "@internal
   In ARM/Thumb-2 state a valid address for Neon element and structure
   load/store instructions."
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, FALSE)")))
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))

 (define_memory_constraint "Us"
  "@internal
   In ARM/Thumb-2 state a valid address for non-offset loads/stores of
   quad-word values in four ARM registers."
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, TRUE)")))
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)")))

 (define_memory_constraint "Uq"
  "@internal
--- /dev/null
+++ b/gcc/config/arm/fp16.c
@@ -0,0 +1,145 @@
+/* Half-float conversion routines.
+
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+static inline unsigned short
+__gnu_f2h_internal(unsigned int a, int ieee)
+{
+  unsigned short sign = (a >> 16) & 0x8000;
+  int aexp = (a >> 23) & 0xff;
+  unsigned int mantissa = a & 0x007fffff;
+  unsigned int mask;
+  unsigned int increment;
+
+  if (aexp == 0xff)
+    {
+      if (!ieee)
+	return sign;
+      return sign | 0x7e00 | (mantissa >> 13);
+    }
+
+  if (aexp == 0 && mantissa == 0)
+    return sign;
+
+  aexp -= 127;
+
+  /* Decimal point between bits 22 and 23.  */
+  mantissa |= 0x00800000;
+  if (aexp < -14)
+    {
+      mask = 0x007fffff;
+      if (aexp < -25)
+	aexp = -26;
+      else if (aexp != -25)
+	mask >>= 24 + aexp;
+    }
+  else
+    mask = 0x00001fff;
+
+  /* Round.  */
+  if (mantissa & mask)
+    {
+      increment = (mask + 1) >> 1;
+      if ((mantissa & mask) == increment)
+	increment = mantissa & (increment << 1);
+      mantissa += increment;
+      if (mantissa >= 0x01000000)
+       	{
+	  mantissa >>= 1;
+	  aexp++;
+	}
+    }
+
+  if (ieee)
+    {
+      if (aexp > 15)
+	return sign | 0x7c00;
+    }
+  else
+    {
+      if (aexp > 16)
+	return sign | 0x7fff;
+    }
+
+  if (aexp < -24)
+    return sign;
+
+  if (aexp < -14)
+    {
+      mantissa >>= -14 - aexp;
+      aexp = -14;
+    }
+
+  /* We leave the leading 1 in the mantissa, and subtract one
+     from the exponent bias to compensate.  */
+  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
+}
+
+unsigned int
+__gnu_h2f_internal(unsigned short a, int ieee)
+{
+  unsigned int sign = (unsigned int)(a & 0x8000) << 16;
+  int aexp = (a >> 10) & 0x1f;
+  unsigned int mantissa = a & 0x3ff;
+
+  if (aexp == 0x1f && ieee)
+    return sign | 0x7f800000 | (mantissa << 13);
+
+  if (aexp == 0)
+    {
+      int shift;
+
+      if (mantissa == 0)
+	return sign;
+
+      shift = __builtin_clz(mantissa) - 21;
+      mantissa <<= shift;
+      aexp = -shift;
+    }
+
+  return sign | (((aexp + 0x70) << 23) + (mantissa << 13));
+}
+
+unsigned short
+__gnu_f2h_ieee(unsigned int a)
+{
+  return __gnu_f2h_internal(a, 1);
+}
+
+unsigned int
+__gnu_h2f_ieee(unsigned short a)
+{
+  return __gnu_h2f_internal(a, 1);
+}
+
+unsigned short
+__gnu_f2h_alternative(unsigned int x)
+{
+  return __gnu_f2h_internal(x, 0);
+}
+
+unsigned int
+__gnu_h2f_alternative(unsigned short a)
+{
+  return __gnu_h2f_internal(a, 0);
+}
--- a/gcc/config/arm/fpa.md
+++ b/gcc/config/arm/fpa.md
@@ -599,10 +599,10 @@
     {
     default:
     case 0: return \"mvf%?e\\t%0, %1\";
-    case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 1: if (TARGET_FPA_EMU2)
 	      return \"ldf%?e\\t%0, %1\";
 	    return \"lfm%?\\t%0, 1, %1\";
-    case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 2: if (TARGET_FPA_EMU2)
 	      return \"stf%?e\\t%1, %0\";
 	    return \"sfm%?\\t%1, 1, %0\";
     }
--- /dev/null
+++ b/gcc/config/arm/hwdiv.md
@@ -0,0 +1,41 @@
+;; ARM instruction patterns for hardware division
+;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+(define_insn "divsi3"
+  [(set (match_operand:SI	  0 "s_register_operand" "=r")
+	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
+		(match_operand:SI 2 "s_register_operand"  "r")))]
+  "arm_arch_hwdiv"
+  "sdiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sdiv")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI	   0 "s_register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
+		 (match_operand:SI 2 "s_register_operand"  "r")))]
+  "arm_arch_hwdiv"
+  "udiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "udiv")]
+)
+
--- a/gcc/config/arm/ieee754-df.S
+++ b/gcc/config/arm/ieee754-df.S
@@ -83,7 +83,7 @@
 ARM_FUNC_START adddf3
 ARM_FUNC_ALIAS aeabi_dadd adddf3

-1:	do_push	{r4, r5, lr}
+1:	do_push	(r4, r5, lr)

 	@ Look for zeroes, equal values, INF, or NAN.
 	shift1	lsl, r4, xh, #1
@@ -427,7 +427,7 @@
 	do_it	eq, t
 	moveq	r1, #0
 	RETc(eq)
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x400		@ initial exponent
 	add	r4, r4, #(52-1 - 1)
 	mov	r5, #0			@ sign bit is 0
@@ -447,7 +447,7 @@
 	do_it	eq, t
 	moveq	r1, #0
 	RETc(eq)
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x400		@ initial exponent
 	add	r4, r4, #(52-1 - 1)
 	ands	r5, r0, #0x80000000	@ sign bit in r5
@@ -481,7 +481,7 @@
 	RETc(eq)			@ we are done already.

 	@ value was denormalized.  We can normalize it now.
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 	mov	r4, #0x380		@ setup corresponding exponent
 	and	r5, xh, #0x80000000	@ move sign bit in r5
 	bic	xh, xh, #0x80000000
@@ -508,9 +508,9 @@
 	@ compatibility.
 	adr	ip, LSYM(f0_ret)
 	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
+	do_push	(r4, r5, ip, lr, pc)
 #else
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 #endif

 	mov	r5, #0
@@ -534,9 +534,9 @@
 	@ compatibility.
 	adr	ip, LSYM(f0_ret)
 	@ Push pc as well so that RETLDM works correctly.
-	do_push	{r4, r5, ip, lr, pc}
+	do_push	(r4, r5, ip, lr, pc)
 #else
-	do_push	{r4, r5, lr}
+	do_push	(r4, r5, lr)
 #endif

 	ands	r5, ah, #0x80000000	@ sign bit in r5
@@ -585,7 +585,7 @@
 	@ Legacy code expects the result to be returned in f0.  Copy it
 	@ there as well.
 LSYM(f0_ret):
-	do_push	{r0, r1}
+	do_push	(r0, r1)
 	ldfd	f0, [sp], #8
 	RETLDM

@@ -602,7 +602,7 @@

 ARM_FUNC_START muldf3
 ARM_FUNC_ALIAS aeabi_dmul muldf3
-	do_push	{r4, r5, r6, lr}
+	do_push	(r4, r5, r6, lr)

 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 	mov	ip, #0xff
@@ -910,7 +910,7 @@
 ARM_FUNC_START divdf3
 ARM_FUNC_ALIAS aeabi_ddiv divdf3

-	do_push	{r4, r5, r6, lr}
+	do_push	(r4, r5, r6, lr)

 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
 	mov	ip, #0xff
@@ -1117,7 +1117,7 @@
 ARM_FUNC_ALIAS eqdf2 cmpdf2
 	mov	ip, #1			@ how should we specify unordered here?

-1:	str	ip, [sp, #-4]
+1:	str	ip, [sp, #-4]!

 	@ Trap any INF/NAN first.
 	mov	ip, xh, lsl #1
@@ -1129,7 +1129,8 @@

 	@ Test for equality.
 	@ Note that 0.0 is equal to -0.0.
-2:	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
+2:	add	sp, sp, #4
+	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
 	do_it	eq, e
 	COND(orr,s,eq)	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
 	teqne	xh, yh			@ or xh == yh
@@ -1168,7 +1169,7 @@
 	bne	2b
 	orrs	ip, yl, yh, lsl #12
 	beq	2b			@ y is not NAN
-5:	ldr	r0, [sp, #-4]		@ unordered return code
+5:	ldr	r0, [sp], #4		@ unordered return code
 	RET

 	FUNC_END gedf2
@@ -1194,7 +1195,7 @@

 	@ The status-returning routines are required to preserve all
 	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, lr}
+6:	do_push	(r0, lr)
 	ARM_CALL cmpdf2
 	@ Set the Z flag correctly, and the C flag unconditionally.
 	cmp	r0, #0
--- a/gcc/config/arm/ieee754-sf.S
+++ b/gcc/config/arm/ieee754-sf.S
@@ -481,7 +481,7 @@
 	and	r3, ip, #0x80000000

 	@ Well, no way to make it shorter without the umull instruction.
-	do_push	{r3, r4, r5}
+	do_push	(r3, r4, r5)
 	mov	r4, r0, lsr #16
 	mov	r5, r1, lsr #16
 	bic	r0, r0, r4, lsl #16
@@ -492,7 +492,7 @@
 	mla	r0, r4, r1, r0
 	adds	r3, r3, r0, lsl #16
 	adc	r1, ip, r0, lsr #16
-	do_pop	{r0, r4, r5}
+	do_pop	(r0, r4, r5)

 #else

@@ -822,7 +822,7 @@
 ARM_FUNC_ALIAS eqsf2 cmpsf2
 	mov	ip, #1			@ how should we specify unordered here?

-1:	str	ip, [sp, #-4]
+1:	str	ip, [sp, #-4]!

 	@ Trap any INF/NAN first.
 	mov	r2, r0, lsl #1
@@ -834,7 +834,8 @@

 	@ Compare values.
 	@ Note that 0.0 is equal to -0.0.
-2:	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
+2:	add	sp, sp, #4
+	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
 	do_it	ne
 	teqne	r0, r1			@ if not 0 compare sign
 	do_it	pl
@@ -858,7 +859,7 @@
 	bne	2b
 	movs	ip, r1, lsl #9
 	beq	2b			@ r1 is not NAN
-5:	ldr	r0, [sp, #-4]		@ return unordered code.
+5:	ldr	r0, [sp], #4		@ return unordered code.
 	RET

 	FUNC_END gesf2
@@ -881,7 +882,7 @@

 	@ The status-returning routines are required to preserve all
 	@ registers except ip, lr, and cpsr.
-6:	do_push	{r0, r1, r2, r3, lr}
+6:	do_push	(r0, r1, r2, r3, lr)
 	ARM_CALL cmpsf2
 	@ Set the Z flag correctly, and the C flag unconditionally.
 	cmp	r0, #0
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -27,8 +27,17 @@
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
 .previous
-#endif
+#endif  /* __ELF__ and __linux__ */

+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+       /* Tag_ABI_align8_needed: This code does not require 8-byte
+          alignment from the caller.  */
+       /* .eabi_attribute 24, 0  -- default setting.  */
+       /* Tag_ABI_align8_preserved: This code preserves 8-byte
+          alignment in any callee.  */
+       .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
 /* ------------------------------------------------------------------------ */

 /* We need to know what prefix to add to function names.  */
@@ -233,8 +242,8 @@
 .macro shift1 op, arg0, arg1, arg2
 	\op	\arg0, \arg1, \arg2
 .endm
-#define do_push	push
-#define do_pop	pop
+#define do_push(...)	push {__VA_ARGS__}
+#define do_pop(...)	pop {__VA_ARGS__}
 #define COND(op1, op2, cond) op1 ## op2 ## cond
 /* Perform an arithmetic operation with a variable shift operand.  This
    requires two instructions and a scratch register on Thumb-2.  */
@@ -248,24 +257,133 @@
 .macro shift1 op, arg0, arg1, arg2
 	mov	\arg0, \arg1, \op \arg2
 .endm
-#define do_push	stmfd sp!,
-#define do_pop	ldmfd sp!,
+#if defined(__low_irq_latency__)
+#define do_push(...) \
+  _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+#define _buildN1(BASE, X)	_buildN2(BASE, X)
+#define _buildN2(BASE, X)	BASE##X
+#define _buildC1(...)		_buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1)
+#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c
+
+#define do_push1(r1) str r1, [sp, #-4]!
+#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]!
+#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]!
+#define do_push4(r1, r2, r3, r4) \
+        do_push3 (r2, r3, r4);\
+        do_push1 (r1)
+#define do_push5(r1, r2, r3, r4, r5) \
+        do_push4 (r2, r3, r4, r5);\
+        do_push1 (r1)
+
+#define do_pop(...) \
+_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+
+#define do_pop1(r1) ldr r1, [sp], #4
+#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4
+#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4
+#define do_pop4(r1, r2, r3, r4) \
+        do_pop1 (r1);\
+        do_pup3 (r2, r3, r4)
+#define do_pop5(r1, r2, r3, r4, r5) \
+        do_pop1 (r1);\
+        do_pop4 (r2, r3, r4, r5)
+#else
+#define do_push(...)    stmfd sp!, { __VA_ARGS__}
+#define do_pop(...)     ldmfd sp!, {__VA_ARGS__}
+#endif
+
+
 #define COND(op1, op2, cond) op1 ## cond ## op2
 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 	\name \dest, \src1, \src2, \shiftop \shiftreg
 .endm
 #endif

-.macro ARM_LDIV0 name
+#ifdef __ARM_EABI__
+.macro ARM_LDIV0 name signed
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM (__aeabi_idiv0) __PLT__
+.endm
+#else
+.macro ARM_LDIV0 name signed
 	str	lr, [sp, #-8]!
 98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
 	bl	SYM (__div0) __PLT__
 	mov	r0, #0			@ About as wrong as it could be.
 	RETLDM	unwind=98b
 .endm
+#endif


-.macro THUMB_LDIV0 name
+#ifdef __ARM_EABI__
+.macro THUMB_LDIV0 name signed
+#if defined(__ARM_ARCH_6M__)
+	.ifc \signed, unsigned
+	cmp	r0, #0
+	beq	1f
+	mov	r0, #0
+	mvn	r0, r0		@ 0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	beq	2f
+	blt	3f
+	mov	r0, #0
+	mvn	r0, r0
+	lsr	r0, r0, #1	@ 0x7fffffff
+	b	2f
+3:	mov	r0, #0x80
+	lsl	r0, r0, #24	@ 0x80000000
+2:
+	.endif
+	push	{r0, r1, r2}
+	ldr	r0, 4f
+	adr	r1, 4f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+4:
+	.word	__aeabi_idiv0 - 4b
+#elif defined(__thumb2__)
+	.syntax unified
+	.ifc \signed, unsigned
+	cbz	r0, 1f
+	mov	r0, #0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	do_it	gt
+	movgt	r0, #0x7fffffff
+	do_it	lt
+	movlt	r0, #0x80000000
+	.endif
+	b.w	SYM(__aeabi_idiv0) __PLT__
+#else
+	.align	2
+	bx	pc
+	nop
+	.arm
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM(__aeabi_idiv0) __PLT__
+	.thumb
+#endif
+.endm
+#else
+.macro THUMB_LDIV0 name signed
 	push	{ r1, lr }
 98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
 	bl	SYM (__div0)
@@ -277,18 +395,19 @@
 	pop	{ r1, pc }
 #endif
 .endm
+#endif

 .macro FUNC_END name
 	SIZE (__\name)
 .endm

-.macro DIV_FUNC_END name
+.macro DIV_FUNC_END name signed
 	cfi_start	__\name, LSYM(Lend_div0)
 LSYM(Ldiv0):
 #ifdef __thumb__
-	THUMB_LDIV0 \name
+	THUMB_LDIV0 \name \signed
 #else
-	ARM_LDIV0 \name
+	ARM_LDIV0 \name \signed
 #endif
 	cfi_end	LSYM(Lend_div0)
 	FUNC_END \name
@@ -413,6 +532,12 @@
 #define yyl r2
 #endif

+#ifdef __ARM_EABI__
+.macro	WEAK name
+	.weak SYM (__\name)
+.endm
+#endif
+
 #ifdef __thumb__
 /* Register aliases.  */

@@ -437,6 +562,43 @@

 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)

+#if defined (__thumb2__)
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	rsb	\curbit, \curbit, #31
+	adr	\result, 1f
+	add	\curbit, \result, \curbit, lsl #4
+	mov	\result, #0
+	mov	pc, \curbit
+.p2align 3
+1:
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp.w	\dividend, \divisor, lsl #shift
+	nop.n
+	adc.w	\result, \result, \result
+	it	cs
+	subcs.w	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+#elif defined(__ARM_TUNE_MARVELL_F__)
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	mov	\divisor, \divisor, lsl \curbit
+	rsb	\curbit, \curbit, #31
+	mov	\curbit, \curbit, lsl #2
+	mov	\result, #0
+	add	pc, pc, \curbit, lsl #2
+	nop
+	.rept	32
+	cmp	\dividend, \divisor
+	subcs	\dividend, \dividend, \divisor
+	mov	\divisor, \divisor, lsr #1
+	adc	\result, \result, \result
+	.endr
+#else  /* ! defined(__ARM_TUNE_MARVELL_F__) */
 	clz	\curbit, \dividend
 	clz	\result, \divisor
 	sub	\curbit, \result, \curbit
@@ -452,6 +614,7 @@
 	adc	\result, \result, \result
 	subcs	\dividend, \dividend, \divisor, lsl #shift
 	.endr
+#endif /* defined(__ARM_TUNE_MARVELL_F__) */

 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 #if __ARM_ARCH__ >= 5
@@ -499,18 +662,23 @@

 	@ Division loop
 1:	cmp	\dividend, \divisor
+	do_it	hs, t
 	subhs	\dividend, \dividend, \divisor
 	orrhs	\result,   \result,   \curbit
 	cmp	\dividend, \divisor,  lsr #1
+	do_it	hs, t
 	subhs	\dividend, \dividend, \divisor, lsr #1
 	orrhs	\result,   \result,   \curbit,  lsr #1
 	cmp	\dividend, \divisor,  lsr #2
+	do_it	hs, t
 	subhs	\dividend, \dividend, \divisor, lsr #2
 	orrhs	\result,   \result,   \curbit,  lsr #2
 	cmp	\dividend, \divisor,  lsr #3
+	do_it	hs, t
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	orrhs	\result,   \result,   \curbit,  lsr #3
 	cmp	\dividend, #0			@ Early termination?
+	do_it	ne, t
 	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 	movne	\divisor,  \divisor, lsr #4
 	bne	1b
@@ -799,13 +967,14 @@
 /* ------------------------------------------------------------------------ */
 #ifdef L_udivsi3

+#if defined(__ARM_ARCH_6M__)
+
 	FUNC_START udivsi3
 	FUNC_ALIAS aeabi_uidiv udivsi3

-#ifdef __thumb__
-
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
+LSYM(udivsi3_nodiv0):
 	mov	curbit, #1
 	mov	result, #0

@@ -819,9 +988,16 @@
 	pop	{ work }
 	RET

-#else /* ARM version.  */
+#else /* ARM/Thumb-2 version.  */
+
+	ARM_FUNC_START udivsi3
+	ARM_FUNC_ALIAS aeabi_uidiv udivsi3

+	/* Note: if called via udivsi3_nodiv0, this will unnecessarily check
+	   for division-by-zero a second time.  */
+LSYM(udivsi3_nodiv0):
 	subs	r2, r1, #1
+	do_it	eq
 	RETc(eq)
 	bcc	LSYM(Ldiv0)
 	cmp	r0, r1
@@ -834,7 +1010,8 @@
 	mov	r0, r2
 	RET

-11:	moveq	r0, #1
+11:	do_it	eq, e
+	moveq	r0, #1
 	movne	r0, #0
 	RET

@@ -845,19 +1022,24 @@

 #endif /* ARM version */

-	DIV_FUNC_END udivsi3
+	DIV_FUNC_END udivsi3 unsigned

+#if defined(__ARM_ARCH_6M__)
 FUNC_START aeabi_uidivmod
-#ifdef __thumb__
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
 	push	{r0, r1, lr}
-	bl	SYM(__udivsi3)
+	bl	LSYM(udivsi3_nodiv0)
 	POP	{r1, r2, r3}
 	mul	r2, r0
 	sub	r1, r1, r2
 	bx	r3
 #else
+ARM_FUNC_START aeabi_uidivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
 	stmfd	sp!, { r0, r1, lr }
-	bl	SYM(__udivsi3)
+	bl	LSYM(udivsi3_nodiv0)
 	ldmfd	sp!, { r1, r2, lr }
 	mul	r3, r2, r0
 	sub	r1, r1, r3
@@ -904,19 +1086,20 @@

 #endif /* ARM version.  */

-	DIV_FUNC_END umodsi3
+	DIV_FUNC_END umodsi3 unsigned

 #endif /* L_umodsi3 */
 /* ------------------------------------------------------------------------ */
 #ifdef L_divsi3

+#if defined(__ARM_ARCH_6M__)
+
 	FUNC_START divsi3
 	FUNC_ALIAS aeabi_idiv divsi3

-#ifdef __thumb__
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
-
+LSYM(divsi3_nodiv0):
 	push	{ work }
 	mov	work, dividend
 	eor	work, divisor		@ Save the sign of the result.
@@ -945,15 +1128,21 @@
 	pop	{ work }
 	RET

-#else /* ARM version.  */
+#else /* ARM/Thumb-2 version.  */

+	ARM_FUNC_START divsi3
+	ARM_FUNC_ALIAS aeabi_idiv divsi3
+
 	cmp	r1, #0
-	eor	ip, r0, r1			@ save the sign of the result.
 	beq	LSYM(Ldiv0)
+LSYM(divsi3_nodiv0):
+	eor	ip, r0, r1			@ save the sign of the result.
+	do_it	mi
 	rsbmi	r1, r1, #0			@ loops below use unsigned.
 	subs	r2, r1, #1			@ division by 1 or -1 ?
 	beq	10f
 	movs	r3, r0
+	do_it	mi
 	rsbmi	r3, r0, #0			@ positive dividend value
 	cmp	r3, r1
 	bls	11f
@@ -963,14 +1152,18 @@
 	ARM_DIV_BODY r3, r1, r0, r2

 	cmp	ip, #0
+	do_it	mi
 	rsbmi	r0, r0, #0
 	RET

 10:	teq	ip, r0				@ same sign ?
+	do_it	mi
 	rsbmi	r0, r0, #0
 	RET

-11:	movlo	r0, #0
+11:	do_it	lo
+	movlo	r0, #0
+	do_it	eq,t
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
 	RET
@@ -979,24 +1172,30 @@

 	cmp	ip, #0
 	mov	r0, r3, lsr r2
+	do_it	mi
 	rsbmi	r0, r0, #0
 	RET

 #endif /* ARM version */

-	DIV_FUNC_END divsi3
+	DIV_FUNC_END divsi3 signed

+#if defined(__ARM_ARCH_6M__)
 FUNC_START aeabi_idivmod
-#ifdef __thumb__
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
 	push	{r0, r1, lr}
-	bl	SYM(__divsi3)
+	bl	LSYM(divsi3_nodiv0)
 	POP	{r1, r2, r3}
 	mul	r2, r0
 	sub	r1, r1, r2
 	bx	r3
 #else
+ARM_FUNC_START aeabi_idivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
 	stmfd	sp!, { r0, r1, lr }
-	bl	SYM(__divsi3)
+	bl	LSYM(divsi3_nodiv0)
 	ldmfd	sp!, { r1, r2, lr }
 	mul	r3, r2, r0
 	sub	r1, r1, r3
@@ -1062,21 +1261,25 @@

 #endif /* ARM version */

-	DIV_FUNC_END modsi3
+	DIV_FUNC_END modsi3 signed

 #endif /* L_modsi3 */
 /* ------------------------------------------------------------------------ */
 #ifdef L_dvmd_tls

-	FUNC_START div0
-	FUNC_ALIAS aeabi_idiv0 div0
-	FUNC_ALIAS aeabi_ldiv0 div0
-
+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	FUNC_START aeabi_idiv0
+	FUNC_START aeabi_ldiv0
 	RET
-
 	FUNC_END aeabi_ldiv0
 	FUNC_END aeabi_idiv0
+#else
+	FUNC_START div0
+	RET
 	FUNC_END div0
+#endif

 #endif /* L_divmodsi_tools */
 /* ------------------------------------------------------------------------ */
@@ -1086,16 +1289,49 @@
 /* Constant taken from <asm/signal.h>.  */
 #define SIGFPE	8

+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	ARM_FUNC_START aeabi_idiv0
+	ARM_FUNC_START aeabi_ldiv0
+#else
 	ARM_FUNC_START div0
+#endif

-	do_push	{r1, lr}
+	do_push	(r1, lr)
 	mov	r0, #SIGFPE
 	bl	SYM(raise) __PLT__
 	RETLDM	r1

+#ifdef __ARM_EABI__
+	FUNC_END aeabi_ldiv0
+	FUNC_END aeabi_idiv0
+#else
 	FUNC_END div0
+#endif

 #endif /* L_dvmd_lnx */
+#ifdef L_clear_cache
+#if defined __ARM_EABI__ && defined __linux__
+@ EABI GNU/Linux call to cacheflush syscall.
+	ARM_FUNC_START clear_cache
+	do_push	(r7)
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+	movw	r7, #2
+	movt	r7, #0xf
+#else
+	mov	r7, #0xf0000
+	add	r7, r7, #2
+#endif
+	mov	r2, #0
+	swi	0
+	do_pop	(r7)
+	RET
+	FUNC_END clear_cache
+#else
+#error "This is only for ARM EABI GNU/Linux"
+#endif
+#endif /* L_clear_cache */
 /* ------------------------------------------------------------------------ */
 /* Dword shift operations.  */
 /* All the following Dword shift variants rely on the fact that
@@ -1292,7 +1528,7 @@
 	push	{r4, lr}
 # else
 ARM_FUNC_START clzdi2
-	do_push	{r4, lr}
+	do_push	(r4, lr)
 # endif
 	cmp	xxh, #0
 	bne	1f
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -66,22 +66,14 @@
 /* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
    use the GNU/Linux version, not the generic BPABI version.  */
 #undef  LINK_SPEC
-#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC BE8_LINK_SPEC

 /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
    do not use -lfloat.  */
 #undef LIBGCC_SPEC

-/* Clear the instruction cache from `beg' to `end'.  This makes an
-   inline system call to SYS_cacheflush.  */
+/* Clear the instruction cache from `beg' to `end'.  This is
+   implemented in lib1funcs.asm, so ensure an error if this definition
+   is used.  */
 #undef  CLEAR_INSN_CACHE
-#define CLEAR_INSN_CACHE(BEG, END)					\
-{									\
-  register unsigned long _beg __asm ("a1") = (unsigned long) (BEG);	\
-  register unsigned long _end __asm ("a2") = (unsigned long) (END);	\
-  register unsigned long _flg __asm ("a3") = 0;				\
-  register unsigned long _scno __asm ("r7") = 0xf0002;			\
-  __asm __volatile ("swi 0		@ sys_cacheflush"		\
-		    : "=r" (_beg)					\
-		    : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno));	\
-}
+#define CLEAR_INSN_CACHE(BEG, END) not used
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -98,7 +98,7 @@

 /* NWFPE always understands FPA instructions.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3
+#define FPUTYPE_DEFAULT "fpe3"

 /* Call the function profiler with a given profile label.  */
 #undef  ARM_FUNCTION_PROFILER
--- /dev/null
+++ b/gcc/config/arm/marvell-f.md
@@ -0,0 +1,365 @@
+;; Marvell 2850 pipeline description
+;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+;; Written by Marvell and CodeSourcery, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 2, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;; This automaton provides a pipeline description for the Marvell
+;; 2850 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "marvell_f")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; This is a dual-issue processor with three pipelines:
+;;
+;; 1. Arithmetic and load/store pipeline A1.
+;;    Issue | E1 | E2 | OF | WR | WB  for load-store instructions
+;;    Issue | E1 | E2 | WB  for arithmetic instructions
+;;
+;; 2. Arithmetic pipeline A2.
+;;    Issue | E1 | E2 | WB
+;;
+;; 3. Multiply and multiply-accumulate pipeline.
+;;    Issue | MAC1 | MAC2 | MAC3 | WB
+;;
+;; There are various bypasses modelled to a greater or lesser extent.
+;;
+;; Latencies in this file correspond to the number of cycles after
+;; the issue stage that it takes for the result of the instruction to
+;; be computed, or for its side-effects to occur.
+
+(define_cpu_unit "a1_e1,a1_e2,a1_of,a1_wr,a1_wb" "marvell_f") ; ALU 1
+(define_cpu_unit "a2_e1,a2_e2,a2_wb" "marvell_f")             ; ALU 2
+(define_cpu_unit "m_1,m_2,m_3,m_wb" "marvell_f")              ; MAC
+
+;; We define an SRAM cpu unit to enable us to describe conflicts
+;; between loads at the E2 stage and stores at the WR stage.
+
+(define_cpu_unit "sram" "marvell_f")
+
+;; Handling of dual-issue constraints.
+;;
+;; Certain pairs of instructions can be issued in parallel, and certain
+;; pairs cannot.  We divide a subset of the instructions into groups as
+;; follows.
+;;
+;; - data processing 1 (mov, mvn);
+;; - data processing 2 (adc, add, and, bic, cmn, cmp, eor, orr, rsb,
+;;                      rsc, sbc, sub, teq, tst);
+;; - load single (ldr, ldrb, ldrbt, ldrt, ldrh, ldrsb, ldrsh);
+;; - store single (str, strb, strbt, strt, strh);
+;; - swap (swp, swpb);
+;; - pld;
+;; - count leading zeros and DSP add/sub (clz, qadd, qdadd, qsub, qdsub);
+;; - multiply 2 (mul, muls, smull, umull, smulxy, smulls, umulls);
+;; - multiply 3 (mla, mlas, smlal, umlal, smlaxy, smlalxy, smlawx,
+;;               smlawy, smlals, umlals);
+;; - branches (b, bl, blx, bx).
+;;
+;; Ignoring conditional execution, it is a good approximation to the core
+;; to model that two instructions may only be issued in parallel if the
+;; following conditions are met.
+;; I.   The instructions both fall into one of the above groups and their
+;;      corresponding groups have a entry in the matrix below that is not X.
+;; II.  The second instruction does not read any register updated by the
+;;      first instruction (already enforced by the GCC scheduler).
+;; III. The second instruction does not need the carry flag updated by the
+;;      first instruction.  Currently we do not model this.
+;;
+;; First	Second instruction group
+;; insn
+;;		DP1  DP2  L    S    SWP  PLD  CLZ  M2   M3   B
+;;
+;;	DP1	ok   ok   ok   ok   ok   ok   ok   ok   ok   ok
+;;	DP2(1)  ok   ok   ok   ok   ok   ok   ok   ok   ok   ok
+;;	DP2(2)  ok   (2)  ok   (4)  ok   ok   ok   ok   X    ok
+;;	L   }
+;;	SWP }   ok   ok   X    X    X    X    ok   ok   ok   ok
+;;	PLD }
+;;      S(3)	ok   ok   X    X    X    X    ok   ok   ok   ok
+;;      S(4)	ok   (2)  X    X    X    X    ok   ok   X    ok
+;;	CLZ     ok   ok   ok   ok   ok   ok   ok   ok   ok   ok
+;;	M2	ok   ok   ok   ok   ok   ok   ok   X    X    ok
+;;	M3	ok   (2)  ok   (4)  ok   ok   ok   X    X    ok
+;;	B	ok   ok   ok   ok   ok   ok   ok   ok   ok   ok
+;;
+;; (1) without register shift
+;; (2) with register shift
+;; (3) with immediate offset
+;; (4) with register offset
+;;
+;; We define a fake cpu unit "reg_shift_lock" to enforce constraints
+;; between instructions in groups DP2(2) and M3.  All other
+;; constraints are enforced automatically by virtue of the limited
+;; number of pipelines available for the various operations, with
+;; the exception of constraints involving S(4) that we do not model.
+
+(define_cpu_unit "reg_shift_lock" "marvell_f")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 1. Certain logic operations can be retired after the E1 stage if
+;; the pipeline is not already retiring another instruction.  In this
+;; model we assume this behaviour always holds for mov, mvn, and, orr, eor
+;; instructions.  If a register shift is involved and the instruction is
+;; not mov or mvn, then a dual-issue constraint must be enforced.
+
+;; The first two cases are separate so they can be identified for
+;; bypasses below.
+
+(define_insn_reservation "marvell_f_alu_early_retire" 1
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu")
+            (eq_attr "insn" "mov,mvn,and,orr,eor")))
+  "(a1_e1,a1_wb)|(a2_e1,a2_wb)")
+
+(define_insn_reservation "marvell_f_alu_early_retire_shift" 1
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu_shift_reg")
+            (eq_attr "insn" "mov,mvn,and,orr,eor")))
+  "(a1_e1,a1_wb)|(a2_e1,a2_wb)")
+
+(define_insn_reservation "marvell_f_alu_early_retire_reg_shift1" 1
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu_shift_reg")
+            (eq_attr "insn" "mov,mvn")))
+  "(a1_e1,a1_wb)|(a2_e1,a2_wb)")
+
+(define_insn_reservation "marvell_f_alu_early_retire_reg_shift2" 1
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu_shift_reg")
+            (eq_attr "insn" "and,orr,eor")))
+  "(reg_shift_lock+a1_e1,a1_wb)|(reg_shift_lock+a2_e1,a2_wb)")
+
+;; 2. ALU operations with no shifted operand.  These bypass the E1 stage if
+;; the E2 stage of the corresponding pipeline is clear; here, we always
+;; model this scenario [*].  We give the operation a latency of 1 yet reserve
+;; both E1 and E2 for it (thus preventing the GCC scheduler, in the case
+;; where both E1 and E2 of one pipeline are clear, from issuing one
+;; instruction to each).
+;;
+;; [*] The non-bypass case is a latency of two, reserving E1 on the first
+;;     cycle and E2 on the next.  Due to the way the scheduler works we
+;;     have to choose between taking this as the default and taking the
+;;     above case (with latency one) as the default; we choose the latter.
+
+(define_insn_reservation "marvell_f_alu_op_bypass_e1" 1
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu")
+            (not (eq_attr "insn" "mov,mvn,and,orr,eor"))))
+  "(a1_e1+a1_e2,a1_wb)|(a2_e1+a2_e2,a2_wb)")
+
+;; 3. ALU operations with a shift-by-constant operand.
+
+(define_insn_reservation "marvell_f_alu_shift_op" 2
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu_shift")
+            (not (eq_attr "insn" "mov,mvn,and,orr,eor"))))
+  "(a1_e1,a1_e2,a1_wb)|(a2_e1,a2_e2,a2_wb)")
+
+;; 4. ALU operations with a shift-by-register operand.  Since the
+;; instruction is never mov or mvn, a dual-issue constraint must
+;; be enforced.
+
+(define_insn_reservation "marvell_f_alu_shift_reg_op" 2
+  (and (eq_attr "tune" "marvell_f")
+       (and (eq_attr "type" "alu_shift_reg")
+            (not (eq_attr "insn" "mov,mvn,and,orr,eor"))))
+  "(reg_shift_lock+a1_e1,a1_e2,a1_wb)|(reg_shift_lock+a2_e1,a2_e2,a2_wb)")
+
+;; Given an ALU operation with shift (I1) followed by another ALU
+;; operation (I2), with I2 depending on the destination register Rd of I1
+;; and with I2 not using that value as the amount or the starting value for
+;; a shift, then I1 and I2 may be issued to the same pipeline on
+;; consecutive cycles.  In terms of this model that corresponds to I1
+;; having a latency of one cycle.  There are three cases for various
+;; I1 and I2 as follows.
+
+;; (a) I1 has a constant or register shift and I2 doesn't have a shift at all.
+(define_bypass 1 "marvell_f_alu_shift_op,\
+	          marvell_f_alu_shift_reg_op"
+	       "marvell_f_alu_op_bypass_e1,marvell_f_alu_early_retire")
+
+;; (b) I1 has a constant or register shift and I2 has a constant shift.
+;; Rd must not provide the starting value for the shift.
+(define_bypass 1 "marvell_f_alu_shift_op,\
+	          marvell_f_alu_shift_reg_op"
+	       "marvell_f_alu_shift_op,marvell_f_alu_early_retire_shift"
+	       "arm_no_early_alu_shift_value_dep")
+
+;; (c) I1 has a constant or register shift and I2 has a register shift.
+;; Rd must not provide the amount by which to shift.
+(define_bypass 1 "marvell_f_alu_shift_op,\
+	          marvell_f_alu_shift_reg_op"
+	       "marvell_f_alu_shift_reg_op,\
+	        marvell_f_alu_early_retire_reg_shift1,\
+	        marvell_f_alu_early_retire_reg_shift2"
+	       "arm_no_early_alu_shift_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions in group "Multiply 2".
+
+(define_insn_reservation "marvell_f_multiply_2" 3
+  (and (eq_attr "tune" "marvell_f")
+       (eq_attr "insn" "mul,muls,smull,umull,smulxy,smulls,umulls"))
+  "m_1,m_2,m_3,m_wb")
+
+;; Multiplication instructions in group "Multiply 3".  There is a
+;; dual-issue constraint with non-multiplication ALU instructions
+;; to be respected here.
+
+(define_insn_reservation "marvell_f_multiply_3" 3
+  (and (eq_attr "tune" "marvell_f")
+       (eq_attr "insn" "mla,mlas,smlal,umlal,smlaxy,smlalxy,smlawx,\
+                        smlawy,smlals,umlals"))
+  "reg_shift_lock+m_1,m_2,m_3,m_wb")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Conditional backward b instructions can have a zero-cycle penalty, and
+;; other conditional b and bl instructions have a one-cycle penalty if
+;; predicted correctly.  Currently we model the zero-cycle case for all
+;; branches.
+
+(define_insn_reservation "marvell_f_branches" 0
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; Call latencies are not predictable; a semi-arbitrary very large
+;; number is used as "positive infinity" for such latencies.
+
+(define_insn_reservation "marvell_f_call" 32
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "call"))
+ "nothing")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback.
+;; These models assume that all memory references hit in dcache.
+
+;; 1. Load/store for single registers.
+
+;; The worst case for a load is when the load result is needed in E1
+;; (for example for a register shift), giving a latency of four.  Loads
+;; skip E1 and access memory at the E2 stage.
+
+(define_insn_reservation "marvell_f_load1" 4
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "load1,load_byte"))
+ "a1_e2+sram,a1_of,a1_wr,a1_wb")
+
+;; The result for a load may be bypassed (to be available at the same
+;; time as the load arrives in the WR stage, so effectively at the OF
+;; stage) to the Rn operand at E2 with a latency of two.  The result may
+;; be bypassed to a non-Rn operand at E2 with a latency of three.  For
+;; instructions without shifts, detection of an Rn bypass situation is
+;; difficult (because some of the instruction patterns switch their
+;; operands), and so we do not model that here.  For instructions with
+;; shifts, the operand used at E2 will always be Rn, and so we can
+;; model the latency-two bypass for these.
+
+(define_bypass 2 "marvell_f_load1"
+               "marvell_f_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 2 "marvell_f_load1"
+               "marvell_f_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+;; Stores write at the WR stage and loads read at the E2 stage, giving
+;; a store latency of three.
+
+(define_insn_reservation "marvell_f_store1" 3
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "store1"))
+ "a1_e2,a1_of,a1_wr+sram,a1_wb")
+
+;; 2. Load/store for two consecutive registers.  These may be dealt
+;; with in the same number of cycles as single loads and stores.
+
+(define_insn_reservation "marvell_f_load2" 4
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "load2"))
+ "a1_e2+sram,a1_of,a1_wr,a1_wb")
+
+(define_insn_reservation "marvell_f_store2" 3
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "store2"))
+ "a1_e2,a1_of,a1_wr+sram,a1_wb")
+
+;; The first word of a doubleword load is eligible for the latency-two
+;; bypass described above for single loads, but this is not modelled here.
+;; We do however assume that either word may also be bypassed with
+;; latency three for ALU operations with shifts (where the shift value and
+;; amount do not depend on the loaded value) and latency four for ALU
+;; operations without shifts.  The latency four case is of course the default.
+
+(define_bypass 3 "marvell_f_load2"
+               "marvell_f_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 3 "marvell_f_load2"
+               "marvell_f_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+;; 3. Load/store for more than two registers.
+
+;; These instructions stall for an extra cycle in the decode stage;
+;; individual load/store instructions for each register are then issued.
+;; The load/store multiple instruction itself is removed from the decode
+;; stage at the same time as the final load/store instruction is issued.
+;; To complicate matters, pairs of loads/stores referencing two
+;; consecutive registers will be issued together as doubleword operations.
+;; We model a 3-word load as an LDR plus an LDRD, and a 4-word load
+;; as two LDRDs; thus, these are allocated the same latencies (the
+;; latency for two consecutive loads plus one for the setup stall).
+;; The extra stall is modelled by reserving E1.
+
+(define_insn_reservation "marvell_f_load3_4" 6
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "load3,load4"))
+ "a1_e1,a1_e1+a1_e2+sram,a1_e2+sram+a1_of,a1_of+a1_wr,a1_wr+a1_wb,a1_wb")
+
+;; Bypasses are possible for ldm as for single loads, but we do not
+;; model them here since the order of the constituent loads is
+;; difficult to predict.
+
+(define_insn_reservation "marvell_f_store3_4" 5
+ (and (eq_attr "tune" "marvell_f")
+      (eq_attr "type" "store3,store4"))
+ "a1_e1,a1_e1+a1_e2,a1_e2+a1_of,a1_of+a1_wr+sram,a1_wr+sram+a1_wb,a1_wb")
+
--- /dev/null
+++ b/gcc/config/arm/marvell-f-vfp.md
@@ -0,0 +1,153 @@
+;; Marvell 2850 VFP pipeline description
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;; This automaton provides a pipeline description for the Marvell
+;; 2850 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "marvell_f_vfp")
+
+;; This is a single-issue VFPv2 implementation with the following execution
+;; units:
+;;
+;; 1. Addition/subtraction unit; takes three cycles, pipelined.
+;; 2. Multiplication unit; takes four cycles, pipelined.
+;; 3. Add buffer, used for multiply-accumulate (see below).
+;; 4. Divide/square root unit, not pipelined.
+;;    For single-precision: takes sixteen cycles, can accept another insn
+;;			    after fifteen cycles.
+;;    For double-precision: takes thirty-one cycles, can accept another insn
+;;			    after thirty cycles.
+;; 5. Single-cycle unit, pipelined.
+;;    This does absolute value/copy/negate/compare in one cycle and
+;;    conversion in two cycles.
+;;
+;; When all three operands of a multiply-accumulate instruction are ready,
+;; one is issued to the add buffer (which can hold six operands in a FIFO)
+;; and the two to be multiplied are issued to the multiply unit.  After
+;; four cycles in the multiply unit, one cycle is taken to issue the
+;; operand from the add buffer plus the multiplication result to the
+;; addition/subtraction unit.  That issue takes priority over any add/sub
+;; instruction waiting at the normal issue stage, but may be performed in
+;; parallel with the issue of a non-add/sub instruction.  The total time
+;; for a multiply-accumulate instruction to pass through the execution
+;; units is hence eight cycles.
+;;
+;; We do not need to explicitly model the add buffer because it can
+;; always issue the instruction at the head of its FIFO (due to the above
+;; priority rule) and there are more spaces in the add buffer (six) than
+;; there are stages (four) in the multiplication unit.
+;;
+;; Two instructions may be retired at once from the head of an 8-entry
+;; reorder buffer.  Data from these first two instructions only may be
+;; forwarded to the inputs of the issue unit.  We assume that the
+;; pressure on the reorder buffer will be sufficiently low that every
+;; instruction entering it will be eligible for data forwarding.  Since
+;; data is forwarded to the issue unit and not the execution units (so
+;; for example single-cycle instructions cannot be issued back-to-back),
+;; the latencies given below are the cycle counts above plus one.
+
+(define_cpu_unit "mf_vfp_issue" "marvell_f_vfp")
+(define_cpu_unit "mf_vfp_add" "marvell_f_vfp")
+(define_cpu_unit "mf_vfp_mul" "marvell_f_vfp")
+(define_cpu_unit "mf_vfp_div" "marvell_f_vfp")
+(define_cpu_unit "mf_vfp_single_cycle" "marvell_f_vfp")
+
+;; An attribute to indicate whether our reservations are applicable.
+
+(define_attr "marvell_f_vfp" "yes,no"
+  (const (if_then_else (and (eq_attr "tune" "marvell_f")
+                            (eq_attr "fpu" "vfp"))
+                       (const_string "yes") (const_string "no"))))
+
+;; Reservations of functional units.  The nothing*2 reservations at the
+;; start of many of the reservation strings correspond to the decode
+;; stages.  We need to have these reservations so that we can correctly
+;; reserve parts of the core's A1 pipeline for loads and stores.  For
+;; that case (since loads skip E1) the pipelines line up thus:
+;;	A1 pipe:	Issue	E2	OF	WR	WB	 ...
+;;	VFP pipe:	Fetch	Decode1	Decode2	Issue	Execute1 ...
+;; For a load, we need to make a reservation of E2, and thus we must
+;; use Decode1 as the starting point for all VFP reservations here.
+;;
+;; For reservations of pipelined VFP execution units we only reserve
+;; the execution unit for the first execution cycle, omitting any trailing
+;; "nothing" reservations.
+
+(define_insn_reservation "marvell_f_vfp_add" 4
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "farith"))
+  "nothing*2,mf_vfp_issue,mf_vfp_add")
+
+(define_insn_reservation "marvell_f_vfp_mul" 5
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "fmuls,fmuld"))
+  "nothing*2,mf_vfp_issue,mf_vfp_mul")
+
+(define_insn_reservation "marvell_f_vfp_divs" 17
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "fdivs"))
+  "nothing*2,mf_vfp_issue,mf_vfp_div*15")
+
+(define_insn_reservation "marvell_f_vfp_divd" 32
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "fdivd"))
+  "nothing*2,mf_vfp_issue,mf_vfp_div*30")
+
+;; The DFA lookahead is small enough that the "add" reservation here
+;; will always take priority over any addition/subtraction instruction
+;; issued five cycles after the multiply-accumulate instruction, as
+;; required.
+(define_insn_reservation "marvell_f_vfp_mac" 9
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "fmacs,fmacd"))
+  "nothing*2,mf_vfp_issue,mf_vfp_mul,nothing*4,mf_vfp_add")
+
+(define_insn_reservation "marvell_f_vfp_single" 2
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "ffarith"))
+  "nothing*2,mf_vfp_issue,mf_vfp_single_cycle")
+
+(define_insn_reservation "marvell_f_vfp_convert" 3
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "f_cvt"))
+  "nothing*2,mf_vfp_issue,mf_vfp_single_cycle")
+
+(define_insn_reservation "marvell_f_vfp_load" 2
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "f_loads,f_loadd"))
+  "a1_e2+sram,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle")
+
+(define_insn_reservation "marvell_f_vfp_from_core" 2
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "r_2_f"))
+  "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle")
+
+;; The interaction between the core and VFP pipelines during VFP
+;; store operations and core <-> VFP moves is not clear, so we guess.
+(define_insn_reservation "marvell_f_vfp_store" 3
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "f_stores,f_stored"))
+  "a1_e2,a1_of,mf_vfp_issue,a1_wr+sram+mf_vfp_single_cycle")
+
+(define_insn_reservation "marvell_f_vfp_to_core" 4
+  (and (eq_attr "marvell_f_vfp" "yes")
+       (eq_attr "type" "f_2_r"))
+  "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle")
+
--- /dev/null
+++ b/gcc/config/arm/montavista-linux.h
@@ -0,0 +1,33 @@
+/* MontaVista GNU/Linux Configuration.
+   Copyright (C) 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Add -tarmv6 and -tthumb2 options for convenience in generating multilibs.
+*/
+#undef CC1_SPEC
+#define CC1_SPEC "							\
+ %{tarmv6:      -march=armv6 -mfloat-abi=softfp ;			\
+   tthumb2:     -mthumb -march=armv7-a -mfloat-abi=softfp ;             \
+   :            -march=armv5t}"
+
+/* The various C libraries each have their own subdirectory.  */
+#undef SYSROOT_SUFFIX_SPEC
+#define SYSROOT_SUFFIX_SPEC					\
+  "%{tarmv6:/armv6 ;			                        \
+     tthumb2:/thumb2}"
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -122,6 +122,7 @@
   | T_uint16 | T_int16 -> T_intHI
   | T_uint32 | T_int32 -> T_intSI
   | T_uint64 | T_int64 -> T_intDI
+  | T_float32 -> T_floatSF
   | T_poly8 -> T_intQI
   | T_poly16 -> T_intHI
   | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
@@ -320,7 +321,7 @@
     typeinfo;
   Format.print_newline ();
   (* Extra types not in <stdint.h>.  *)
-  Format.printf "typedef __builtin_neon_sf float32_t;\n";
+  Format.printf "typedef float float32_t;\n";
   Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
   Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"

--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -159,7 +159,8 @@
    (UNSPEC_VUZP1		201)
    (UNSPEC_VUZP2		202)
    (UNSPEC_VZIP1		203)
-   (UNSPEC_VZIP2		204)])
+   (UNSPEC_VZIP2		204)
+   (UNSPEC_MISALIGNED_ACCESS	205)])

 ;; Double-width vector modes.
 (define_mode_iterator VD [V8QI V4HI V2SI V2SF])
@@ -459,7 +460,9 @@
 	  "=w,Uv,w, w,  ?r,?w,?r,?r, ?Us")
 	(match_operand:VD 1 "general_operand"
 	  " w,w, Dn,Uvi, w, r, r, Usi,r"))]
-  "TARGET_NEON"
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
 {
   if (which_alternative == 2)
     {
@@ -481,7 +484,7 @@

   /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
      below must be changed to output_move_neon (which will use the
-     element/structure loads/stores), and the constraint changed to 'Un' instead
+     element/structure loads/stores), and the constraint changed to 'Um' instead
      of 'Uv'.  */

   switch (which_alternative)
@@ -506,7 +509,9 @@
   	  "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
 	(match_operand:VQXMOV 1 "general_operand"
 	  " w,w, Dn,Uni, w, r, r, Usi, r"))]
-  "TARGET_NEON"
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
 {
   if (which_alternative == 2)
     {
@@ -549,6 +554,11 @@
 	(match_operand:TI 1 "general_operand" ""))]
   "TARGET_NEON"
 {
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (TImode, operands[1]);
+    }
 })

 (define_expand "mov<mode>"
@@ -556,12 +566,19 @@
 	(match_operand:VSTRUCT 1 "general_operand" ""))]
   "TARGET_NEON"
 {
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
 })

 (define_insn "*neon_mov<mode>"
   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
 	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
-  "TARGET_NEON"
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
 {
   switch (which_alternative)
     {
@@ -658,6 +675,49 @@
   neon_disambiguate_copy (operands, dest, src, 4);
 })

+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VDQX 0 "nonimmediate_operand"	      "")
+	(unspec:VDQX [(match_operand:VDQX 1 "general_operand" "")]
+		     UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  if (!s_register_operand (operands[0], <MODE>mode)
+      && !s_register_operand (operands[1], <MODE>mode))
+    FAIL;
+})
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VDX 0 "memory_operand"                  "=Um")
+	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vst1.<V_sz_elem>\t{%P1}, %A0"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VDX 0 "s_register_operand"          "=w")
+	(unspec:VDX [(match_operand:VDX 1 "memory_operand" " Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vld1.<V_sz_elem>\t{%P0}, %A1"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VQX 0 "memory_operand"                  "=Um")
+	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vst1.<V_sz_elem>\t{%q1}, %A0"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VQX 0 "s_register_operand"           "=w")
+	(unspec:VQX [(match_operand:VQX 1 "general_operand" " Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vld1.<V_sz_elem>\t{%q0}, %A1"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+
 (define_insn "vec_set<mode>_internal"
   [(set (match_operand:VD 0 "s_register_operand" "=w")
         (vec_merge:VD
@@ -862,6 +922,50 @@
                                     (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
 )

+(define_insn "*mul<mode>3add<mode>_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+                            (match_operand:VDQ 3 "s_register_operand" "w"))
+		  (match_operand:VDQ 1 "s_register_operand" "0")))]
+  "TARGET_NEON"
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "*mul<mode>3neg<mode>add<mode>_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
+                   (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+                             (match_operand:VDQ 3 "s_register_operand" "w"))))]
+  "TARGET_NEON"
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
 (define_insn "ior<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
@@ -3611,7 +3715,8 @@
 			  UNSPEC_VSHLL_N))]
   "TARGET_NEON"
 {
-  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  /* The boundaries are: 0 < imm <= size.  */
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
   return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
 }
   [(set_attr "neon_type" "neon_shift_1")]
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -50,7 +50,7 @@
              | T_ptrto of vectype | T_const of vectype
              | T_void      | T_intQI
              | T_intHI     | T_intSI
-             | T_intDI
+             | T_intDI     | T_floatSF

 (* The meanings of the following are:
      TImode : "Tetra", two registers (four words).
@@ -1693,6 +1693,7 @@
   | T_intHI -> "__builtin_neon_hi"
   | T_intSI -> "__builtin_neon_si"
   | T_intDI -> "__builtin_neon_di"
+  | T_floatSF -> "__builtin_neon_sf"
   | T_arrayof (num, base) ->
       let basename = name (fun x -> x) base in
       affix (Printf.sprintf "%sx%d" basename num)
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -51,8 +51,8 @@
   Printf.fprintf chan "/* This file was autogenerated by neon-testgen.  */\n\n";
   Printf.fprintf chan "/* { dg-do assemble } */\n";
   Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n";
-  Printf.fprintf chan
-                 "/* { dg-options \"-save-temps -O0 -mfpu=neon -mfloat-abi=softfp\" } */\n";
+  Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
+  Printf.fprintf chan "/* { dg-add-options arm_neon } */\n";
   Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
   Printf.fprintf chan "void test_%s (void)\n{\n" test_name

--- a/gcc/config/arm/netbsd-elf.h
+++ b/gcc/config/arm/netbsd-elf.h
@@ -153,5 +153,5 @@
 while (0)

 #undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"

--- /dev/null
+++ b/gcc/config/arm/nocrt0.h
@@ -0,0 +1,25 @@
+/* Definitions for generic libgloss based cofigs where crt0 is supplied by
+   the linker script.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC	" crti%O%s crtbegin%O%s"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -73,6 +73,10 @@
 	      || REGNO_REG_CLASS (REGNO (op)) == FPA_REGS));
 })

+(define_special_predicate "subreg_lowpart_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_p (op)")))
+
 ;; Reg, subreg(reg) or const_int.
 (define_predicate "reg_or_int_operand"
   (ior (match_code "const_int")
@@ -168,6 +172,11 @@
   (and (match_code "plus,minus,ior,xor,and")
        (match_test "mode == GET_MODE (op)")))

+;; True for plus/minus operators
+(define_special_predicate "plusminus_operator"
+  (and (match_code "plus,minus")
+       (match_test "mode == GET_MODE (op)")))
+
 ;; True for logical binary operators.
 (define_special_predicate "logical_binary_operator"
   (and (match_code "ior,xor,and")
@@ -295,6 +304,9 @@
   HOST_WIDE_INT i = 1, base = 0;
   rtx elt;

+  if (low_irq_latency)
+    return false;
+
   if (count <= 1
       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
     return false;
@@ -352,6 +364,9 @@
   HOST_WIDE_INT i = 1, base = 0;
   rtx elt;

+  if (low_irq_latency)
+    return false;
+
   if (count <= 1
       || GET_CODE (XVECEXP (op, 0, 0)) != SET)
     return false;
--- a/gcc/config/arm/sfp-machine.h
+++ b/gcc/config/arm/sfp-machine.h
@@ -14,9 +14,11 @@
 #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)

+#define _FP_NANFRAC_H		((_FP_QNANBIT_H << 1) - 1)
 #define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
 #define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
 #define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_H		0
 #define _FP_NANSIGN_S		0
 #define _FP_NANSIGN_D		0
 #define _FP_NANSIGN_Q		0
@@ -92,5 +94,7 @@
 #define __fixdfdi	__aeabi_d2lz
 #define __fixunsdfdi	__aeabi_d2ulz
 #define __floatdidf	__aeabi_l2d
+#define __extendhfsf2	__gnu_h2f_ieee
+#define __truncsfhf2	__gnu_f2h_ieee

 #endif /* __ARM_EABI__ */
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -13,7 +13,9 @@
 		$(srcdir)/config/arm/iwmmxt.md \
 		$(srcdir)/config/arm/vfp.md \
 		$(srcdir)/config/arm/neon.md \
-		$(srcdir)/config/arm/thumb2.md
+		$(srcdir)/config/arm/thumb2.md \
+		$(srcdir)/config/arm/marvell-f.md \
+		$(srcdir)/config/arm/hwdiv.md

 s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
 	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -24,10 +24,18 @@
 #MULTILIB_MATCHES      += march?armv7=march?armv7-a
 #MULTILIB_MATCHES      += march?armv7=march?armv7-r
 #MULTILIB_MATCHES      += march?armv7=march?armv7-m
+#MULTILIB_MATCHES      += march?armv7=march?armv7e-m
 #MULTILIB_MATCHES      += march?armv7=mcpu?cortex-a8
 #MULTILIB_MATCHES      += march?armv7=mcpu?cortex-r4
 #MULTILIB_MATCHES      += march?armv7=mcpu?cortex-m3

+# Not quite true.  We can support hard-vfp calling in Thumb2, but how do we
+# express that here?  Also, we really need architecture v5e or later
+# (mcrr etc).
+MULTILIB_OPTIONS       += mfloat-abi=hard
+MULTILIB_DIRNAMES      += fpu
+MULTILIB_EXCEPTIONS    += *mthumb/*mfloat-abi=hard*
+
 # MULTILIB_OPTIONS    += mcpu=ep9312
 # MULTILIB_DIRNAMES   += ep9312
 # MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312*
--- a/gcc/config/arm/t-bpabi
+++ b/gcc/config/arm/t-bpabi
@@ -1,10 +1,13 @@
 # Add the bpabi.S functions.
-LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
+LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod \
+		_aeabi_idiv0 _aeabi_ldiv0

 # Add the BPABI C functions.
 LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \
 		  $(srcdir)/config/arm/unaligned-funcs.c

+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
+
 UNWIND_H = $(srcdir)/config/arm/unwind-arm.h
 LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \
   $(srcdir)/config/arm/libunwind.S \
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -24,6 +24,8 @@
 ;; changes made in armv5t as "thumb2".  These are considered part
 ;; the 16-bit Thumb-1 instruction set.

+(include "hwdiv.md")
+
 (define_insn "*thumb2_incscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
         (plus:SI (match_operator:SI 2 "arm_comparison_operator"
@@ -172,34 +174,6 @@
    (set_attr "length" "8")]
 )

-(define_insn "*thumb2_abssi2"
-  [(set (match_operand:SI         0 "s_register_operand" "=r,&r")
-	(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_THUMB2"
-  "@
-   cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
-   eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
-  [(set_attr "conds" "clob,*")
-   (set_attr "shift" "1")
-   ;; predicable can't be set based on the variant, so left as no
-   (set_attr "length" "10,8")]
-)
-
-(define_insn "*thumb2_neg_abssi2"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
-	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_THUMB2"
-  "@
-   cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
-   eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
-  [(set_attr "conds" "clob,*")
-   (set_attr "shift" "1")
-   ;; predicable can't be set based on the variant, so left as no
-   (set_attr "length" "10,8")]
-)
-
 (define_insn "*thumb2_movdi"
   [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
 	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r"))]
@@ -223,9 +197,14 @@
    (set_attr "neg_pool_range" "*,*,*,0,*")]
 )

+;; We have two alternatives here for memory loads (and similarly for stores)
+;; to reflect the fact that the permissible constant pool ranges differ
+;; between ldr instructions taking low regs and ldr instructions taking high
+;; regs.  The high register alternatives are not taken into account when
+;; choosing register preferences in order to reflect their expense.
 (define_insn "*thumb2_movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
-	(match_operand:SI 1 "general_operand"	   "rk ,I,K,N,mi,rk"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m")
+        (match_operand:SI 1 "general_operand"      "rk ,I,K,j,mi,*mi,l,*hk"))]
   "TARGET_THUMB2 && ! TARGET_IWMMXT
    && !(TARGET_HARD_FLOAT && TARGET_VFP)
    && (   register_operand (operands[0], SImode)
@@ -236,11 +215,13 @@
    mvn%?\\t%0, #%B1
    movw%?\\t%0, %1
    ldr%?\\t%0, %1
+   ldr%?\\t%0, %1
+   str%?\\t%1, %0
    str%?\\t%1, %0"
-  [(set_attr "type" "*,*,*,*,load1,store1")
+  [(set_attr "type" "*,*,*,*,load1,load1,store1,store1")
    (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,*,4096,*")
-   (set_attr "neg_pool_range" "*,*,*,*,0,*")]
+   (set_attr "pool_range" "*,*,*,*,1020,4096,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")]
 )

 ;; ??? We can probably do better with thumb2
@@ -1128,27 +1109,7 @@
       return \"add%!\\t%0, %1, %2\";
   "
   [(set_attr "predicable" "yes")
-   (set_attr "length" "2")]
-)
-
-(define_insn "divsi3"
-  [(set (match_operand:SI	  0 "s_register_operand" "=r")
-	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
-		(match_operand:SI 2 "s_register_operand"  "r")))]
-  "TARGET_THUMB2 && arm_arch_hwdiv"
-  "sdiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "insn" "sdiv")]
-)
-
-(define_insn "udivsi3"
-  [(set (match_operand:SI	   0 "s_register_operand" "=r")
-	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
-		 (match_operand:SI 2 "s_register_operand"  "r")))]
-  "TARGET_THUMB2 && arm_arch_hwdiv"
-  "udiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "insn" "udiv")]
+   (set_attr "length" "4")]
 )

 (define_insn "*thumb2_subsi_short"
@@ -1162,6 +1123,71 @@
    (set_attr "length" "2")]
 )

+;; 16-bit encodings of "muls" and "mul<c>".  We only use these when
+;; optimizing for size since "muls" is slow on all known
+;; implementations and since "mul<c>" will be generated by
+;; "*arm_mulsi3_v6" anyhow.  The assembler will use a 16-bit encoding
+;; for "mul<c>" whenever possible anyhow.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+        (mult:SI (match_operand:SI 1 "low_register_operand" "")
+                 (match_dup 0)))]
+  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+           (mult:SI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+        (mult:SI (match_dup 0)
+	         (match_operand:SI 1 "low_register_operand" "")))]
+  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+           (mult:SI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_mulsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+        (mult:SI (match_operand:SI 1 "low_register_operand" "%0")
+                 (match_operand:SI 2 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && optimize_size && reload_completed"
+  "mul%!\\t%0, %2, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
 (define_insn "*thumb2_cbz"
   [(set (pc) (if_then_else
 	      (eq (match_operand:SI 0 "s_register_operand" "l,?r")
@@ -1171,7 +1197,7 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB2"
   "*
-  if (get_attr_length (insn) == 2 && which_alternative == 0)
+  if (get_attr_length (insn) == 2)
     return \"cbz\\t%0, %l1\";
   else
     return \"cmp\\t%0, #0\;beq\\t%l1\";
@@ -1179,7 +1205,8 @@
   [(set (attr "length")
         (if_then_else
 	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
-	         (le (minus (match_dup 1) (pc)) (const_int 128)))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (eq (symbol_ref ("which_alternative")) (const_int 0)))
 	    (const_int 2)
 	    (const_int 8)))]
 )
@@ -1193,7 +1220,7 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB2"
   "*
-  if (get_attr_length (insn) == 2 && which_alternative == 0)
+  if (get_attr_length (insn) == 2)
     return \"cbnz\\t%0, %l1\";
   else
     return \"cmp\\t%0, #0\;bne\\t%l1\";
@@ -1201,7 +1228,8 @@
   [(set (attr "length")
         (if_then_else
 	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
-	         (le (minus (match_dup 1) (pc)) (const_int 128)))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (eq (symbol_ref ("which_alternative")) (const_int 0)))
 	    (const_int 2)
 	    (const_int 8)))]
 )
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -6,8 +6,8 @@
 MULTILIB_OPTIONS	=
 MULTILIB_DIRNAMES	=

-# Use a version of div0 which raises SIGFPE.
-LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx
+# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
+LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache

 # Multilib the standard Linux files.  Don't include crti.o or crtn.o,
 # which are provided by glibc.
--- a/gcc/config/arm/t-symbian
+++ b/gcc/config/arm/t-symbian
@@ -17,6 +17,9 @@
 LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c
 LIB2ADDEHDEP = $(UNWIND_H)

+# Include half-float helpers.
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
+
 # Create a multilib for processors with VFP floating-point, and a
 # multilib for those without -- using the soft-float ABI in both
 # cases.  Symbian OS object should be compiled with interworking
--- a/gcc/config/arm/uclinux-eabi.h
+++ b/gcc/config/arm/uclinux-eabi.h
@@ -50,6 +50,10 @@
 #undef ARM_DEFAULT_ABI
 #define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX

+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "--start-group %G %L --end-group"
+
 /* Clear the instruction cache from `beg' to `end'.  This makes an
    inline system call to SYS_cacheflush.  */
 #undef CLEAR_INSN_CACHE
--- a/gcc/config/arm/unwind-arm.c
+++ b/gcc/config/arm/unwind-arm.c
@@ -1000,7 +1000,6 @@
   while (code != _URC_END_OF_STACK
 	 && code != _URC_FAILURE);

- finish:
   restore_non_core_regs (&saved_vrs);
   return code;
 }
@@ -1168,6 +1167,9 @@
 			{
 			  matched = (void *)(ucbp + 1);
 			  rtti = _Unwind_decode_target2 ((_uw) &data[i + 1]);
+			  /* There is no way to encode an exception
+			     specification for 'class X * &', so
+			     always pass false for is_reference.  */
 			  if (__cxa_type_match (ucbp, (type_info *) rtti, 0,
 						&matched))
 			    break;
@@ -1197,8 +1199,6 @@
 		  ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];

 		  if (data[0] & uint32_highbit)
-		    phase2_call_unexpected_after_unwind = 1;
-		  else
 		    {
 		      data += rtti_count + 1;
 		      /* Setup for entry to the handler.  */
@@ -1208,6 +1208,8 @@
 		      _Unwind_SetGR (context, 0, (_uw) ucbp);
 		      return _URC_INSTALL_CONTEXT;
 		    }
+		  else
+		    phase2_call_unexpected_after_unwind = 1;
 		}
 	      if (data[0] & uint32_highbit)
 		data++;
--- a/gcc/config/arm/unwind-arm.h
+++ b/gcc/config/arm/unwind-arm.h
@@ -229,9 +229,10 @@
 	return 0;

 #if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__)
-      /* Pc-relative indirect.  */
+      /* Pc-relative indirect.  Propagate the bottom 2 bits, which can
+	 contain referenceness information in gnu unwinding tables.  */
       tmp += ptr;
-      tmp = *(_Unwind_Word *) tmp;
+      tmp = *(_Unwind_Word *) (tmp & ~(_Unwind_Word)3) | (tmp & 3);
 #elif defined(__symbian__) || defined(__uClinux__)
       /* Absolute pointer.  Nothing more to do.  */
 #else
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -38,6 +38,11 @@
   "TARGET_NEON
    || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
 {
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
 })

 ;; Vector arithmetic. Expanders are blank, then unnamed insns implement
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -51,7 +51,7 @@
 ;; problems because small constants get converted into adds.
 (define_insn "*arm_movsi_vfp"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv")
-      (match_operand:SI 1 "general_operand"	   "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))]
+      (match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))]
   "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
@@ -82,13 +82,17 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
 )

+;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
+;; high/low register alternatives for loads and stores here.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m,*t,r, *t,*t, *Uv")
-      (match_operand:SI 1 "general_operand"	   "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m,*t,r, *t,*t, *Uv")
+      (match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,*mi,l,*hk,r,*t,*t,*Uvi,*t"))]
   "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
@@ -102,25 +106,29 @@
     case 3:
       return \"movw%?\\t%0, %1\";
     case 4:
-      return \"ldr%?\\t%0, %1\";
     case 5:
-      return \"str%?\\t%1, %0\";
+      return \"ldr%?\\t%0, %1\";
     case 6:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
     case 7:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"str%?\\t%1, %0\";
     case 8:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 9:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 10:
       return \"fcpys%?\\t%0, %1\\t%@ int\";
-    case 9: case 10:
+    case 11: case 12:
       return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store")
-   (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,*,   0,*,*,*,*,1008,*")]
+   (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_load,f_store")
+   (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
+   (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
 )


@@ -145,7 +153,10 @@
     case 4:
       return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 5:
-      return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
     case 6: case 7:
       return output_move_vfp (operands);
     default:
@@ -153,7 +164,14 @@
     }
   "
   [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
-   (set_attr "length" "8,8,8,4,4,4,4,4")
+   (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+			       (eq_attr "alternative" "5")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
    (set_attr "pool_range"     "*,1020,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
 )
@@ -172,7 +190,10 @@
     case 4:
       return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 5:
-      return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
     case 6: case 7:
       return output_move_vfp (operands);
     default:
@@ -180,11 +201,123 @@
     }
   "
   [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store")
-   (set_attr "length" "8,8,8,4,4,4,4,4")
+   (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+			       (eq_attr "alternative" "5")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
    (set_attr "pool_range"     "*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,   0,*,*,*,*,1008,*")]
 )

+;; HFmode moves
+(define_insn "*movhf_vfp_neon"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* S register from memory */
+      return \"vld1.16\\t{%z0}, %A1\";
+    case 1:     /* memory from S register */
+      return \"vst1.16\\t{%z1}, %A0\";
+    case 2:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 3:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 4:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 5:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 6:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 7:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 8:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+)
+
+;; FP16 without element load/store instructions.
+(define_insn "*movhf_vfp"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 1:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 3:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 4:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 5:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 6:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "length" "4,4,4,4,4,4,8")]
+)
+

 ;; SFmode moves
 ;; Disparage the w<->r cases because reloading an invalid address is
@@ -222,6 +355,8 @@
   [(set_attr "predicable" "yes")
    (set_attr "type"
      "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
+   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
@@ -258,6 +393,8 @@
   [(set_attr "predicable" "yes")
    (set_attr "type"
      "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*")
+   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
+   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
@@ -267,7 +404,7 @@

 (define_insn "*movdf_vfp"
   [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dv,mF,r,UvF,w, w,r"))]
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -280,13 +417,17 @@
       case 1:
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
       case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
         return \"fconstd%?\\t%P0, #%G1\";
       case 3: case 4:
 	return output_move_double (operands);
       case 5: case 6:
 	return output_move_vfp (operands);
       case 7:
-	return \"fcpyd%?\\t%P0, %P1\";
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
       case 8:
         return \"#\";
       default:
@@ -296,14 +437,21 @@
   "
   [(set_attr "type"
      "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
-   (set_attr "length" "4,4,4,8,8,4,4,4,8")
+   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
    (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
 )

 (define_insn "*thumb2_movdf_vfp"
   [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dv,mF,r,UvF,w, w,r"))]
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
   "*
   {
@@ -314,13 +462,17 @@
       case 1:
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
       case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
 	return \"fconstd%?\\t%P0, #%G1\";
       case 3: case 4: case 8:
 	return output_move_double (operands);
       case 5: case 6:
 	return output_move_vfp (operands);
       case 7:
-	return \"fcpyd%?\\t%P0, %P1\";
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
       default:
 	abort ();
       }
@@ -328,7 +480,14 @@
   "
   [(set_attr "type"
      "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*")
-   (set_attr "length" "4,4,4,8,8,4,4,4,8")
+   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
    (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
 )
@@ -356,7 +515,8 @@
    fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
 )

 (define_insn "*thumb2_movsfcc_vfp"
@@ -379,7 +539,8 @@
    ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
 )

 (define_insn "*movdfcc_vfp"
@@ -389,7 +550,7 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
    fcpyd%D3\\t%P0, %P2
    fcpyd%d3\\t%P0, %P1
@@ -402,7 +563,8 @@
    fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
 )

 (define_insn "*thumb2_movdfcc_vfp"
@@ -412,7 +574,7 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
    it\\t%D3\;fcpyd%D3\\t%P0, %P2
    it\\t%d3\;fcpyd%d3\\t%P0, %P1
@@ -425,7 +587,8 @@
    ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
 )


@@ -443,7 +606,7 @@
 (define_insn "*absdf2_vfp"
   [(set (match_operand:DF	  0 "s_register_operand" "=w")
 	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fabsd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "ffarithd")]
@@ -463,12 +626,12 @@
 (define_insn_and_split "*negdf2_vfp"
   [(set (match_operand:DF	  0 "s_register_operand" "=w,?r,?r")
 	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
    fnegd%?\\t%P0, %P1
    #
    #"
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && reload_completed
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
    && arm_general_register_operand (operands[0], DFmode)"
   [(set (match_dup 0) (match_dup 1))]
   "
@@ -523,7 +686,7 @@
   [(set (match_operand:DF	   0 "s_register_operand" "=w")
 	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "faddd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "faddd")]
@@ -544,7 +707,7 @@
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
 		  (match_operand:DF 2 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsubd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "faddd")]
@@ -567,7 +730,7 @@
   [(set (match_operand:DF	  0 "s_register_operand" "+w")
 	(div:DF (match_operand:DF 1 "s_register_operand" "w")
 		(match_operand:DF 2 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fdivd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fdivd")]
@@ -590,7 +753,7 @@
   [(set (match_operand:DF	   0 "s_register_operand" "+w")
 	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fmuld%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmuld")]
@@ -611,7 +774,7 @@
   [(set (match_operand:DF		   0 "s_register_operand" "+w")
 	(mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
 		 (match_operand:DF	   2 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fnmuld%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmuld")]
@@ -626,7 +789,8 @@
 	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			  (match_operand:SF 3 "s_register_operand" "t"))
 		 (match_operand:SF	    1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP
+   && (!arm_tune_marvell_f || optimize_size)"
   "fmacs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacs")]
@@ -637,7 +801,8 @@
 	(plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
 			  (match_operand:DF 3 "s_register_operand" "w"))
 		 (match_operand:DF	    1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+   && (!arm_tune_marvell_f || optimize_size)"
   "fmacd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacd")]
@@ -649,7 +814,8 @@
 	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			   (match_operand:SF 3 "s_register_operand" "t"))
 		  (match_operand:SF	     1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP
+   && (!arm_tune_marvell_f || optimize_size)"
   "fmscs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacs")]
@@ -660,7 +826,8 @@
 	(minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
 			   (match_operand:DF 3 "s_register_operand" "w"))
 		  (match_operand:DF	     1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+   && (!arm_tune_marvell_f || optimize_size)"
   "fmscd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacd")]
@@ -672,7 +839,8 @@
 	(minus:SF (match_operand:SF	     1 "s_register_operand" "0")
 		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			   (match_operand:SF 3 "s_register_operand" "t"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP
+   && (!arm_tune_marvell_f || optimize_size)"
   "fnmacs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacs")]
@@ -683,7 +851,8 @@
 	(minus:DF (match_operand:DF	     1 "s_register_operand" "0")
 		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
 			   (match_operand:DF 3 "s_register_operand" "w"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+   && (!arm_tune_marvell_f || optimize_size)"
   "fnmacd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacd")]
@@ -697,7 +866,8 @@
 		    (neg:SF (match_operand:SF 2 "s_register_operand" "t"))
 		    (match_operand:SF	      3 "s_register_operand" "t"))
 		  (match_operand:SF	      1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP
+   && (!arm_tune_marvell_f || optimize_size)"
   "fnmscs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacs")]
@@ -709,7 +879,8 @@
 		    (neg:DF (match_operand:DF 2 "s_register_operand" "w"))
 		    (match_operand:DF	      3 "s_register_operand" "w"))
 		  (match_operand:DF	      1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+   && (!arm_tune_marvell_f || optimize_size)"
   "fnmscd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fmacd")]
@@ -721,7 +892,7 @@
 (define_insn "*extendsfdf2_vfp"
   [(set (match_operand:DF		   0 "s_register_operand" "=w")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fcvtds%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
@@ -730,12 +901,30 @@
 (define_insn "*truncdfsf2_vfp"
   [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fcvtsd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
 )

+(define_insn "extendhfsf2"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f32.f16\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f16.f32\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
 (define_insn "*truncsisf2_vfp"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
@@ -748,7 +937,7 @@
 (define_insn "*truncsidf2_vfp"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "ftosizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
@@ -767,7 +956,7 @@
 (define_insn "fixuns_truncdfsi2"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "ftouizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
@@ -786,7 +975,7 @@
 (define_insn "*floatsidf2_vfp"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
@@ -805,7 +994,7 @@
 (define_insn "floatunssidf2"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fuitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "f_cvt")]
@@ -826,7 +1015,7 @@
 (define_insn "*sqrtdf2_vfp"
   [(set (match_operand:DF	   0 "s_register_operand" "=w")
 	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsqrtd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "type" "fdivd")]
@@ -878,9 +1067,9 @@
   [(set (reg:CCFP CC_REGNUM)
 	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w")
 		      (match_operand:DF 1 "vfp_compare_operand" "wG")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "#"
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   [(set (reg:CCFP VFPCC_REGNUM)
 	(compare:CCFP (match_dup 0)
 		       (match_dup 1)))
@@ -893,9 +1082,9 @@
   [(set (reg:CCFPE CC_REGNUM)
 	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w")
 		       (match_operand:DF 1 "vfp_compare_operand" "wG")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "#"
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   [(set (reg:CCFPE VFPCC_REGNUM)
 	(compare:CCFPE (match_dup 0)
 		       (match_dup 1)))
@@ -935,7 +1124,7 @@
   [(set (reg:CCFP VFPCC_REGNUM)
 	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w,w")
 		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
    fcmpd%?\\t%P0, %P1
    fcmpzd%?\\t%P0"
@@ -947,7 +1136,7 @@
   [(set (reg:CCFPE VFPCC_REGNUM)
 	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w,w")
 		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
    fcmped%?\\t%P0, %P1
    fcmpezd%?\\t%P0"
--- /dev/null
+++ b/gcc/config/i386/atom.md
@@ -0,0 +1,795 @@
+;; Atom Scheduling
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other"
+  (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+  (const_string "other"))
+
+(define_automaton "atom")
+
+;;  Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;;  EU: Execution Unit
+;;  Atom EUs are connected by port 0 or port 1.
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+                  "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 +
+                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
+                                    atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32"
+                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4,
+                     atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1"
+               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation  "atom_other" 9
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "atom_other_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_multi" 9
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "multi"))
+  "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "atom_imovx" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "atom_imovx_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "atom_imovx_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "atom_imovx_2_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "atom_imovx_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_lea" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "atom_lea_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_incdec" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_incdec_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "atom_ishift" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "atom_ishift_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation  "atom_ishift_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation  "atom_ishift1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_ishift1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_imul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+(define_insn_reservation  "atom_imul_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation  "atom_imul_3" 10
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation  "atom_idiv" 65
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "idiv"))
+  "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation  "atom_icmp" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmp_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ibr" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "atom_ibr_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_setcc" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "atom_setcc_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_icmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_push" 2
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "push"))
+  "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_call" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "call"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_callv" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "callv"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_leave" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "leave"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_str" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "str"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_sselog" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_sselog1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "atom_sseiadd" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "atom_sseiadd_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "atom_sseiadd_3" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "atom_sseiadd_4" 6
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*5")
+
+;; if immediate op.
+(define_insn_reservation  "atom_sseishft" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "atom_sseishft_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "atom_sseishft_3" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_sseimul" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "sseimul"))
+  "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "atom_sse" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "atom_sse_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "atom-simple-0")
+
+;; lfence
+(define_insn_reservation  "atom_sse_3" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "atom_sse_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "fence")
+                 (eq_attr "atom_sse_attr" "prefetch"))))
+  "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "atom_sse_5" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+                      (eq_attr "atom_sse_attr" "mxcsr"))
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation  "atom_ssemov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+  "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "atom_ssemov_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+  "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "atom_ssemov_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+  "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation  "atom_ssemov_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation  "atom_ssemov_5" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation  "atom_sseadd" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation  "atom_sseadd_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "atom_sseadd_3" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation  "atom_ssemul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "atom_ssemul_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "atom-fmul-4c")
+
+(define_insn_reservation  "atom_ssecmp" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecmp"))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ssecomi" 10
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecomi"))
+  "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt_2" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+  "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation  "atom_ssecvt_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "atom_sseicvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "memory_operand"))))
+  "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "atom_sseicvt_2" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation  "atom_ssediv" 62
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssediv"))
+  "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+                 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+                  atom_ishift_mem, atom_ishift1_mem,
+                  atom_rotate_mem, atom_rotate1_mem"
+                  "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+                  "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+                  atom_ishift1, atom_rotate, atom_rotate1"
+                 "atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem,
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem,
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -29,6 +29,7 @@
 #define bit_CMPXCHG16B	(1 << 13)
 #define bit_SSE4_1	(1 << 19)
 #define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
 #define bit_POPCNT	(1 << 23)
 #define bit_AES		(1 << 25)
 #define bit_XSAVE	(1 << 26)
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -34,7 +34,7 @@
 #endif

 #undef TARGET_64BIT_MS_ABI
-#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)

 #undef DEFAULT_ABI
 #define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
@@ -203,7 +203,7 @@
 #define CHECK_STACK_LIMIT 4000

 #undef STACK_BOUNDARY
-#define STACK_BOUNDARY	(DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+#define STACK_BOUNDARY	(ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)

 /* By default, target has a 80387, uses IEEE compatible arithmetic,
    returns float values in the 387 and needs stack probes.
--- a/gcc/config/i386/cygming.opt
+++ b/gcc/config/i386/cygming.opt
@@ -45,3 +45,7 @@
 mwindows
 Target
 Create GUI application
+
+mpe-aligned-commons
+Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM)
+Use the GNU extension to the PE format for aligned common data
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -378,7 +378,7 @@
   /* Extended features */
   unsigned int has_lahf_lm = 0, has_sse4a = 0;
   unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
-  unsigned int has_sse4_1 = 0, has_sse4_2 = 0;
+  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
   unsigned int has_pclmul = 0;

@@ -398,9 +398,22 @@

   __cpuid (1, eax, ebx, ecx, edx);

-  /* We don't care for extended family.  */
   model = (eax >> 4) & 0x0f;
   family = (eax >> 8) & 0x0f;
+  if (vendor == SIG_INTEL)
+    {
+      unsigned int extended_model, extended_family;
+
+      extended_model = (eax >> 12) & 0xf0;
+      extended_family = (eax >> 20) & 0xff;
+      if (family == 0x0f)
+	{
+	  family += extended_family;
+	  model += extended_model;
+	}
+      else if (family == 0x06)
+	model += extended_model;
+    }

   has_sse3 = ecx & bit_SSE3;
   has_ssse3 = ecx & bit_SSSE3;
@@ -408,6 +421,7 @@
   has_sse4_2 = ecx & bit_SSE4_2;
   has_avx = ecx & bit_AVX;
   has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+  has_movbe = ecx & bit_MOVBE;
   has_popcnt = ecx & bit_POPCNT;
   has_aes = ecx & bit_AES;
   has_pclmul = ecx & bit_PCLMUL;
@@ -505,8 +519,8 @@
       break;
     case PROCESSOR_PENTIUMPRO:
       if (has_longmode)
-	/* It is Core 2 Duo.  */
-	cpu = "core2";
+	/* It is Core 2 or Atom.  */
+	cpu = (model == 28) ? "atom" : "core2";
       else if (arch)
 	{
 	  if (has_sse3)
@@ -597,6 +611,8 @@
 	options = concat (options, "-mcx16 ", NULL);
       if (has_lahf_lm)
 	options = concat (options, "-msahf ", NULL);
+      if (has_movbe)
+	options = concat (options, "-mmovbe ", NULL);
       if (has_aes)
 	options = concat (options, "-maes ", NULL);
       if (has_pclmul)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1036,6 +1036,79 @@
   1,                                    /* cond_not_taken_branch_cost.  */
 };

+static const
+struct processor_costs atom_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*                               HI */
+   COSTS_N_INSNS (3),			/*                               SI */
+   COSTS_N_INSNS (4),			/*                               DI */
+   COSTS_N_INSNS (2)},			/*                               other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*                          HI */
+   COSTS_N_INSNS (42),			/*                          SI */
+   COSTS_N_INSNS (74),			/*                          DI */
+   COSTS_N_INSNS (74)},			/*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  2,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {15, unrolled_loop},
+          {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{24, loop}, {32, unrolled_loop},
+          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  1,                                    /* scalar_stmt_cost.  */
+  1,                                    /* scalar load_cost.  */
+  1,                                    /* scalar_store_cost.  */
+  1,                                    /* vec_stmt_cost.  */
+  1,                                    /* vec_to_scalar_cost.  */
+  1,                                    /* scalar_to_vec_cost.  */
+  1,                                    /* vec_align_load_cost.  */
+  2,                                    /* vec_unalign_load_cost.  */
+  1,                                    /* vec_store_cost.  */
+  3,                                    /* cond_taken_branch_cost.  */
+  1,                                    /* cond_not_taken_branch_cost.  */
+};
+
 /* Generic64 should produce code tuned for Nocona and K8.  */
 static const
 struct processor_costs generic64_cost = {
@@ -1194,6 +1267,7 @@
 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
 #define m_CORE2  (1<<PROCESSOR_CORE2)
+#define m_ATOM  (1<<PROCESSOR_ATOM)

 #define m_GEODE  (1<<PROCESSOR_GEODE)
 #define m_K6  (1<<PROCESSOR_K6)
@@ -1231,10 +1305,11 @@
   m_486 | m_PENT,

   /* X86_TUNE_UNROLL_STRLEN */
-  m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
+  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
+  | m_CORE2 | m_GENERIC,

   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
-  m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
+  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,

   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
      on simulation result. But after P4 was made, no performance benefit
@@ -1246,12 +1321,12 @@
   ~m_386,

   /* X86_TUNE_USE_SAHF */
-  m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
+  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
   | m_NOCONA | m_CORE2 | m_GENERIC,

   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
      partial dependencies.  */
-  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,

   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
@@ -1271,13 +1346,13 @@
   m_386 | m_486 | m_K6_GEODE,

   /* X86_TUNE_USE_SIMODE_FIOP */
-  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
+  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),

   /* X86_TUNE_USE_MOV0 */
   m_K6,

   /* X86_TUNE_USE_CLTD */
-  ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
+  ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),

   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
   m_PENT4,
@@ -1292,8 +1367,8 @@
   ~(m_PENT | m_PPRO),

   /* X86_TUNE_PROMOTE_QIMODE */
-  m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
-  | m_GENERIC /* | m_PENT4 ? */,
+  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
+  | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,

   /* X86_TUNE_FAST_PREFIX */
   ~(m_PENT | m_486 | m_386),
@@ -1317,26 +1392,28 @@
   m_PPRO,

   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
-  m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
+  | m_CORE2 | m_GENERIC,

   /* X86_TUNE_ADD_ESP_8 */
-  m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,

   /* X86_TUNE_SUB_ESP_4 */
-  m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
+  | m_GENERIC,

   /* X86_TUNE_SUB_ESP_8 */
-  m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,

   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
      for DFmode copies */
-  ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
     | m_GENERIC | m_GEODE),

   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
-  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,

   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
      conflict here in between PPro/Pentium4 based chips that thread 128bit
@@ -1347,7 +1424,8 @@
      shows that disabling this option on P4 brings over 20% SPECfp regression,
      while enabling it on K8 brings roughly 2.4% regression that can be partly
      masked by careful scheduling of moves.  */
-  m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
+  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
+  | m_AMDFAM10,

   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
   m_AMDFAM10,
@@ -1365,13 +1443,13 @@
   m_PPRO | m_PENT4 | m_NOCONA,

   /* X86_TUNE_MEMORY_MISMATCH_STALL */
-  m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,

   /* X86_TUNE_PROLOGUE_USING_MOVE */
-  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,

   /* X86_TUNE_EPILOGUE_USING_MOVE */
-  m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,

   /* X86_TUNE_SHIFT1 */
   ~m_486,
@@ -1380,29 +1458,32 @@
   m_AMD_MULTIPLE,

   /* X86_TUNE_INTER_UNIT_MOVES */
-  ~(m_AMD_MULTIPLE | m_GENERIC),
+  ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),

   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
   ~(m_AMDFAM10),

   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
      than 4 branch instructions in the 16 byte window.  */
-  m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
+  | m_GENERIC,

   /* X86_TUNE_SCHEDULE */
-  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
+  | m_GENERIC,

   /* X86_TUNE_USE_BT */
-  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+  m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,

   /* X86_TUNE_USE_INCDEC */
-  ~(m_PENT4 | m_NOCONA | m_GENERIC),
+  ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),

   /* X86_TUNE_PAD_RETURNS */
   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,

   /* X86_TUNE_EXT_80387_CONSTANTS */
-  m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
+  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
+  | m_CORE2 | m_GENERIC,

   /* X86_TUNE_SHORTEN_X87_SSE */
   ~m_K8,
@@ -1447,6 +1528,10 @@
      with a subsequent conditional jump instruction into a single
      compare-and-branch uop.  */
   m_CORE2,
+
+  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+     will impact LEA instruction selection. */
+  m_ATOM,
 };

 /* Feature tests against the various architecture variations.  */
@@ -1472,10 +1557,11 @@
 };

 static const unsigned int x86_accumulate_outgoing_args
-  = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+    | m_GENERIC;

 static const unsigned int x86_arch_always_fancy_math_387
-  = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
     | m_NOCONA | m_CORE2 | m_GENERIC;

 static enum stringop_alg stringop_alg = no_stringop;
@@ -1743,6 +1829,9 @@
 /* Alignment for incoming stack boundary in bits.  */
 unsigned int ix86_incoming_stack_boundary;

+/* The abi used by target.  */
+enum calling_abi ix86_abi;
+
 /* Values 1-5: see jump.c */
 int ix86_branch_cost;

@@ -1819,6 +1908,8 @@
 static bool ix86_can_inline_p (tree, tree);
 static void ix86_set_current_function (tree);

+static enum calling_abi ix86_function_abi (const_tree);
+

 /* The svr4 ABI for the i386 says that records and unions are returned
    in memory.  */
@@ -1877,9 +1968,11 @@

 #define OPTION_MASK_ISA_ABM_SET \
   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE

 /* Define a set of ISAs which aren't available when a given ISA is
    disabled.  MMX and SSE ISAs are handled separately.  */
@@ -1921,6 +2014,7 @@
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE

 /* Vectorization library interface and handlers.  */
 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
@@ -1953,7 +2047,8 @@
   {&core2_cost, 16, 10, 16, 10, 16},
   {&generic32_cost, 16, 7, 16, 7, 16},
   {&generic64_cost, 16, 10, 16, 10, 16},
-  {&amdfam10_cost, 32, 24, 32, 7, 32}
+  {&amdfam10_cost, 32, 24, 32, 7, 32},
+  {&atom_cost, 16, 7, 16, 7, 16}
 };

 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
@@ -1971,6 +2066,7 @@
   "prescott",
   "nocona",
   "core2",
+  "atom",
   "geode",
   "k6",
   "k6-2",
@@ -2209,6 +2305,19 @@
 	}
       return true;

+    case OPT_mmovbe:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
+	}
+      return true;
+
     case OPT_maes:
       if (value)
 	{
@@ -2271,6 +2380,7 @@
     { "-mmmx",		OPTION_MASK_ISA_MMX },
     { "-mabm",		OPTION_MASK_ISA_ABM },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
+    { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
     { "-maes",		OPTION_MASK_ISA_AES },
     { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
   };
@@ -2487,7 +2597,8 @@
       PTA_AES = 1 << 17,
       PTA_PCLMUL = 1 << 18,
       PTA_AVX = 1 << 19,
-      PTA_FMA = 1 << 20
+      PTA_FMA = 1 << 20,
+      PTA_MOVBE = 1 << 21
     };

   static struct pta
@@ -2529,6 +2640,9 @@
       {"core2", PROCESSOR_CORE2, CPU_CORE2,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16},
+      {"atom", PROCESSOR_ATOM, CPU_ATOM,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
       {"geode", PROCESSOR_GEODE, CPU_GEODE,
 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -2716,6 +2830,20 @@
     error ("bad value (%s) for %sarch=%s %s",
 	   ix86_arch_string, prefix, suffix, sw);

+  /* Validate -mabi= value.  */
+  if (ix86_abi_string)
+    {
+      if (strcmp (ix86_abi_string, "sysv") == 0)
+	ix86_abi = SYSV_ABI;
+      else if (strcmp (ix86_abi_string, "ms") == 0)
+	ix86_abi = MS_ABI;
+      else
+	error ("unknown ABI (%s) for %sabi=%s %s",
+	       ix86_abi_string, prefix, suffix, sw);
+    }
+  else
+    ix86_abi = DEFAULT_ABI;
+
   if (ix86_cmodel_string != 0)
     {
       if (!strcmp (ix86_cmodel_string, "small"))
@@ -2828,6 +2956,9 @@
 	if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
 	  ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+	if (processor_alias_table[i].flags & PTA_MOVBE
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
+	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
 	if (processor_alias_table[i].flags & PTA_AES
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
 	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
@@ -4592,14 +4723,14 @@
      default ABI.  */

   /* RAX is used as hidden argument to va_arg functions.  */
-  if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
+  if (ix86_abi == SYSV_ABI && regno == AX_REG)
     return true;

-  if (DEFAULT_ABI == MS_ABI)
+  if (ix86_abi == MS_ABI)
     parm_regs = x86_64_ms_abi_int_parameter_registers;
   else
     parm_regs = x86_64_int_parameter_registers;
-  for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+  for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
   					 : X86_64_REGPARM_MAX); i++)
     if (regno == parm_regs[i])
       return true;
@@ -4627,7 +4758,7 @@
 int
 ix86_reg_parm_stack_space (const_tree fndecl)
 {
-  int call_abi = SYSV_ABI;
+  enum calling_abi call_abi = SYSV_ABI;
   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
     call_abi = ix86_function_abi (fndecl);
   else
@@ -4639,37 +4770,39 @@

 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
    call abi used.  */
-int
+enum calling_abi
 ix86_function_type_abi (const_tree fntype)
 {
   if (TARGET_64BIT && fntype != NULL)
     {
-      int abi;
-      if (DEFAULT_ABI == SYSV_ABI)
-        abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
-      else
-        abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
-
+      enum calling_abi abi = ix86_abi;
+      if (abi == SYSV_ABI)
+	{
+	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+	    abi = MS_ABI;
+	}
+      else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
+	abi = SYSV_ABI;
       return abi;
     }
-  return DEFAULT_ABI;
+  return ix86_abi;
 }

-int
+static enum calling_abi
 ix86_function_abi (const_tree fndecl)
 {
   if (! fndecl)
-    return DEFAULT_ABI;
+    return ix86_abi;
   return ix86_function_type_abi (TREE_TYPE (fndecl));
 }

 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
    call abi used.  */
-int
+enum calling_abi
 ix86_cfun_abi (void)
 {
   if (! cfun || ! TARGET_64BIT)
-    return DEFAULT_ABI;
+    return ix86_abi;
   return cfun->machine->call_abi;
 }

@@ -4683,7 +4816,7 @@
 ix86_call_abi_override (const_tree fndecl)
 {
   if (fndecl == NULL_TREE)
-    cfun->machine->call_abi = DEFAULT_ABI;
+    cfun->machine->call_abi = ix86_abi;
   else
     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
 }
@@ -4724,8 +4857,8 @@
   cum->nregs = ix86_regparm;
   if (TARGET_64BIT)
     {
-      if (cum->call_abi != DEFAULT_ABI)
-        cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+      if (cum->call_abi != ix86_abi)
+        cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
         				     : X64_REGPARM_MAX;
     }
   if (TARGET_SSE)
@@ -4733,8 +4866,8 @@
       cum->sse_nregs = SSE_REGPARM_MAX;
       if (TARGET_64BIT)
         {
-          if (cum->call_abi != DEFAULT_ABI)
-            cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+          if (cum->call_abi != ix86_abi)
+            cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
             					     : X64_SSE_REGPARM_MAX;
         }
     }
@@ -5700,7 +5833,7 @@
   if (type)
     mode = type_natural_mode (type, NULL);

-  if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     function_arg_advance_ms_64 (cum, bytes, words);
   else if (TARGET_64BIT)
     function_arg_advance_64 (cum, mode, type, words, named);
@@ -5846,9 +5979,9 @@
   if (mode == VOIDmode)
     return GEN_INT (cum->maybe_vaarg
 		    ? (cum->sse_nregs < 0
-		       ? (cum->call_abi == DEFAULT_ABI
+		       ? (cum->call_abi == ix86_abi
 		          ? SSE_REGPARM_MAX
-		          : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+		          : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
 		          			     : X64_SSE_REGPARM_MAX))
  	       : cum->sse_regno)
 		    : -1);
@@ -5942,7 +6075,7 @@
   if (type && TREE_CODE (type) == VECTOR_TYPE)
     mode = type_natural_mode (type, cum);

-  if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     return function_arg_ms_64 (cum, mode, omode, named, bytes);
   else if (TARGET_64BIT)
     return function_arg_64 (cum, mode, omode, type, named);
@@ -5962,7 +6095,7 @@
 			const_tree type, bool named ATTRIBUTE_UNUSED)
 {
   /* See Windows x64 Software Convention.  */
-  if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     {
       int msize = (int) GET_MODE_SIZE (mode);
       if (type)
@@ -6102,7 +6235,7 @@
       /* TODO: The function should depend on current function ABI but
        builtins.c would need updating then. Therefore we use the
        default ABI.  */
-      if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+      if (TARGET_64BIT && ix86_abi == MS_ABI)
 	return false;
       return TARGET_FLOAT_RETURNS_IN_80387;

@@ -6498,13 +6631,13 @@
 static tree
 ix86_build_builtin_va_list (void)
 {
-  tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
+  tree ret = ix86_build_builtin_va_list_abi (ix86_abi);

   /* Initialize abi specific va_list builtin types.  */
   if (TARGET_64BIT)
     {
       tree t;
-      if (DEFAULT_ABI == MS_ABI)
+      if (ix86_abi == MS_ABI)
         {
           t = ix86_build_builtin_va_list_abi (SYSV_ABI);
           if (TREE_CODE (t) != RECORD_TYPE)
@@ -6518,7 +6651,7 @@
             t = build_variant_type_copy (t);
           sysv_va_list_type_node = t;
         }
-      if (DEFAULT_ABI != MS_ABI)
+      if (ix86_abi != MS_ABI)
         {
           t = ix86_build_builtin_va_list_abi (MS_ABI);
           if (TREE_CODE (t) != RECORD_TYPE)
@@ -6551,8 +6684,8 @@
   int i;
   int regparm = ix86_regparm;

-  if (cum->call_abi != DEFAULT_ABI)
-    regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+  if (cum->call_abi != ix86_abi)
+    regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;

   /* GPR size of varargs save area.  */
   if (cfun->va_list_gpr_size)
@@ -6705,7 +6838,7 @@
     return true;
   canonic = ix86_canonical_va_list_type (type);
   return (canonic == ms_va_list_type_node
-          || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
+          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
 }

 /* Implement va_start.  */
@@ -12987,6 +13120,316 @@
     emit_move_insn (operands[0], dst);
 }

+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+   or register number REGNO2 in INSN's basic block until
+   1. Pass LEA_SEARCH_THRESHOLD instructions, or
+   2. Reach BB boundary, or
+   3. Reach agu definition.
+   Returns the distance between the non-agu definition point and INSN.
+   If no definition point, returns -1.  */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+			 rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+  enum attr_type insn_type;
+
+  if (insn != BB_HEAD (bb))
+    {
+      rtx prev = PREV_INSN (insn);
+      while (prev && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (INSN_P (prev))
+	    {
+	      distance++;
+              for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+                if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+                    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+                    && (regno1 == DF_REF_REGNO (*def_rec)
+			|| regno2 == DF_REF_REGNO (*def_rec)))
+		  {
+		    insn_type = get_attr_type (prev);
+		    if (insn_type != TYPE_LEA)
+		      goto done;
+		  }
+	    }
+	  if (prev == BB_HEAD (bb))
+	    break;
+	  prev = PREV_INSN (prev);
+	}
+    }
+
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	{
+	  rtx prev = BB_END (bb);
+	  while (prev
+		 && prev != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (INSN_P (prev))
+		{
+		  distance++;
+		  for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& (regno1 == DF_REF_REGNO (*def_rec)
+			    || regno2 == DF_REF_REGNO (*def_rec)))
+		      {
+			insn_type = get_attr_type (prev);
+			if (insn_type != TYPE_LEA)
+			  goto done;
+		      }
+		}
+	      prev = PREV_INSN (prev);
+	    }
+	}
+    }
+
+  distance = -1;
+
+done:
+  /* get_attr_type may modify recog data.  We want to make sure
+     that recog data is valid for instruction INSN, on which
+     distance_non_agu_define is called.  INSN is unchanged here.  */
+  extract_insn_cached (insn);
+  return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+   register number REGNO0 in memory address.  Return -1 if no such
+   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+  df_ref *use_rec;
+
+  if (insn != BB_END (bb))
+    {
+      rtx next = NEXT_INSN (insn);
+      while (next && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (INSN_P (next))
+	    {
+	      distance++;
+
+	      for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+		     || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+		    && regno0 == DF_REF_REGNO (*use_rec))
+		  {
+		    /* Return DISTANCE if OP0 is used in memory
+		       address in NEXT.  */
+		    return distance;
+		  }
+
+	      for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+		    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+		    && regno0 == DF_REF_REGNO (*def_rec))
+		  {
+		    /* Return -1 if OP0 is set in NEXT.  */
+		    return -1;
+		  }
+	    }
+	  if (next == BB_END (bb))
+	    break;
+	  next = NEXT_INSN (next);
+	}
+    }
+
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        if (e->dest == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	{
+	  rtx next = BB_HEAD (bb);
+	  while (next
+		 && next != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (INSN_P (next))
+		{
+		  distance++;
+
+		  for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		    if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+			 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+			&& regno0 == DF_REF_REGNO (*use_rec))
+		      {
+			/* Return DISTANCE if OP0 is used in memory
+			   address in NEXT.  */
+			return distance;
+		      }
+
+		  for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& regno0 == DF_REF_REGNO (*def_rec))
+		      {
+			/* Return -1 if OP0 is set in NEXT.  */
+			return -1;
+		      }
+
+		}
+	      next = NEXT_INSN (next);
+	    }
+	}
+    }
+
+  return -1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+   there is a dilemma of choicing LEA or ADD
+   Negative value: ADD is more preferred than LEA
+   Zero: Netrual
+   Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+   operation to avoid flag register consumation.  For the processors
+   like ATOM, if the destination register of LEA holds an actual
+   address which will be used soon, LEA is better and otherwise ADD
+   is better.  */
+
+bool
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+                     rtx insn, rtx operands[])
+{
+  unsigned int regno0 = true_regnum (operands[0]);
+  unsigned int regno1 = true_regnum (operands[1]);
+  unsigned int regno2;
+
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return regno0 != regno1;
+
+  regno2 = true_regnum (operands[2]);
+
+  /* If a = b + c, (a!=b && a!=c), must use lea form. */
+  if (regno0 != regno1 && regno0 != regno2)
+    return true;
+  else
+    {
+      int dist_define, dist_use;
+      dist_define = distance_non_agu_define (regno1, regno2, insn);
+      if (dist_define <= 0)
+        return true;
+
+      /* If this insn has both backward non-agu dependence and forward
+         agu dependence, the one with short distance take effect. */
+      dist_use = distance_agu_use (regno0, insn);
+      if (dist_use <= 0
+	  || (dist_define + IX86_LEA_PRIORITY) < dist_use)
+        return false;
+
+      return true;
+    }
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+   USE_BODY.  */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+  rtx set_dest;
+  rtx shift_rtx;
+  int i;
+
+  /* Retrieve destination of SET_BODY.  */
+  switch (GET_CODE (set_body))
+    {
+    case SET:
+      set_dest = SET_DEST (set_body);
+      if (!set_dest || !REG_P (set_dest))
+	return false;
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+					  use_body))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  /* Retrieve shift count of USE_BODY.  */
+  switch (GET_CODE (use_body))
+    {
+    case SET:
+      shift_rtx = XEXP (use_body, 1);
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (set_body,
+					  XVECEXP (use_body, 0, i)))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  if (shift_rtx
+      && (GET_CODE (shift_rtx) == ASHIFT
+	  || GET_CODE (shift_rtx) == LSHIFTRT
+	  || GET_CODE (shift_rtx) == ASHIFTRT
+	  || GET_CODE (shift_rtx) == ROTATE
+	  || GET_CODE (shift_rtx) == ROTATERT))
+    {
+      rtx shift_count = XEXP (shift_rtx, 1);
+
+      /* Return true if shift count is dest of SET_BODY.  */
+      if (REG_P (shift_count)
+	  && true_regnum (set_dest) == true_regnum (shift_count))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+   USE_INSN.  */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+				       PATTERN (use_insn));
+}
+
 /* Return TRUE or FALSE depending on whether the unary operator meets the
    appropriate constraints.  */

@@ -18838,7 +19281,7 @@
   f = GGC_CNEW (struct machine_function);
   f->use_fast_prologue_epilogue_nregs = -1;
   f->tls_descriptor_call_expanded_p = 0;
-  f->call_abi = DEFAULT_ABI;
+  f->call_abi = ix86_abi;

   return f;
 }
@@ -19099,6 +19542,7 @@
   switch (ix86_tune)
     {
     case PROCESSOR_PENTIUM:
+    case PROCESSOR_ATOM:
     case PROCESSOR_K6:
       return 2;

@@ -19165,41 +19609,21 @@
   return 1;
 }

-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
-   address with operands set by DEP_INSN.  */
+/* Return true iff USE_INSN has a memory address with operands set by
+   SET_INSN.  */

-static int
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
 {
-  rtx addr;
-
-  if (insn_type == TYPE_LEA
-      && TARGET_PENTIUM)
-    {
-      addr = PATTERN (insn);
-
-      if (GET_CODE (addr) == PARALLEL)
-	addr = XVECEXP (addr, 0, 0);
-
-      gcc_assert (GET_CODE (addr) == SET);
-
-      addr = SET_SRC (addr);
-    }
-  else
-    {
-      int i;
-      extract_insn_cached (insn);
-      for (i = recog_data.n_operands - 1; i >= 0; --i)
-	if (MEM_P (recog_data.operand[i]))
-	  {
-	    addr = XEXP (recog_data.operand[i], 0);
-	    goto found;
-	  }
-      return 0;
-    found:;
-    }
-
-  return modified_in_p (addr, dep_insn);
+  int i;
+  extract_insn_cached (use_insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+	rtx addr = XEXP (recog_data.operand[i], 0);
+	return modified_in_p (addr, set_insn) != 0;
+      }
+  return false;
 }

 static int
@@ -19227,7 +19651,20 @@
     {
     case PROCESSOR_PENTIUM:
       /* Address Generation Interlock adds a cycle of latency.  */
-      if (ix86_agi_dependent (insn, dep_insn, insn_type))
+      if (insn_type == TYPE_LEA)
+	{
+	  rtx addr = PATTERN (insn);
+
+	  if (GET_CODE (addr) == PARALLEL)
+	    addr = XVECEXP (addr, 0, 0);
+
+	  gcc_assert (GET_CODE (addr) == SET);
+
+	  addr = SET_SRC (addr);
+	  if (modified_in_p (addr, dep_insn))
+	    cost += 1;
+	}
+      else if (ix86_agi_dependent (dep_insn, insn))
 	cost += 1;

       /* ??? Compares pair with jump/setcc.  */
@@ -19237,7 +19674,7 @@
       /* Floating point stores require value to be ready one cycle earlier.  */
       if (insn_type == TYPE_FMOV
 	  && get_attr_memory (insn) == MEMORY_STORE
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	cost += 1;
       break;

@@ -19260,7 +19697,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  /* Claim moves to take one cycle, as core can issue one load
 	     at time and the next load can start cycle later.  */
@@ -19289,7 +19726,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  /* Claim moves to take one cycle, as core can issue one load
 	     at time and the next load can start cycle later.  */
@@ -19306,6 +19743,7 @@
     case PROCESSOR_ATHLON:
     case PROCESSOR_K8:
     case PROCESSOR_AMDFAM10:
+    case PROCESSOR_ATOM:
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       memory = get_attr_memory (insn);
@@ -19314,7 +19752,7 @@
 	 in parallel with previous instruction in case
 	 previous instruction is not needed to compute the address.  */
       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
+	  && !ix86_agi_dependent (dep_insn, insn))
 	{
 	  enum attr_unit unit = get_attr_unit (insn);
 	  int loadcost = 3;
@@ -29594,14 +30032,11 @@
 tree
 ix86_fn_abi_va_list (tree fndecl)
 {
-  int abi;
-
   if (!TARGET_64BIT)
     return va_list_type_node;
   gcc_assert (fndecl != NULL_TREE);
-  abi = ix86_function_abi ((const_tree) fndecl);

-  if (abi == MS_ABI)
+  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
     return ms_va_list_type_node;
   else
     return sysv_va_list_type_node;
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -119,6 +119,10 @@
       def_or_undef (parse_in, "__core2");
       def_or_undef (parse_in, "__core2__");
       break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__atom");
+      def_or_undef (parse_in, "__atom__");
+      break;
     /* use PROCESSOR_max to not set/unset the arch macro.  */
     case PROCESSOR_max:
       break;
@@ -187,6 +191,9 @@
     case PROCESSOR_CORE2:
       def_or_undef (parse_in, "__tune_core2__");
       break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__tune_atom__");
+      break;
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       break;
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -59,6 +59,7 @@
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
 #define TARGET_SAHF	OPTION_ISA_SAHF
+#define TARGET_MOVBE	OPTION_ISA_MOVBE
 #define TARGET_AES	OPTION_ISA_AES
 #define TARGET_PCLMUL	OPTION_ISA_PCLMUL
 #define TARGET_CMPXCHG16B OPTION_ISA_CX16
@@ -236,6 +237,7 @@
 #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
 #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)

 /* Feature tests against the various tunings.  */
 enum ix86_tune_indices {
@@ -300,6 +302,7 @@
   X86_TUNE_USE_VECTOR_FP_CONVERTS,
   X86_TUNE_USE_VECTOR_CONVERTS,
   X86_TUNE_FUSE_CMP_AND_BRANCH,
+  X86_TUNE_OPT_AGU,

   X86_TUNE_LAST
 };
@@ -387,6 +390,7 @@
 	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
 #define TARGET_FUSE_CMP_AND_BRANCH \
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]

 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
@@ -470,7 +474,10 @@
   MS_ABI = 1
 };

-/* The default abi form used by target.  */
+/* The abi used by target.  */
+extern enum calling_abi ix86_abi;
+
+/* The default abi used by target.  */
 #define DEFAULT_ABI SYSV_ABI

 /* Subtargets may reset this to 1 in order to enable 96-bit long double
@@ -569,6 +576,7 @@
   TARGET_CPU_DEFAULT_prescott,
   TARGET_CPU_DEFAULT_nocona,
   TARGET_CPU_DEFAULT_core2,
+  TARGET_CPU_DEFAULT_atom,

   TARGET_CPU_DEFAULT_geode,
   TARGET_CPU_DEFAULT_k6,
@@ -658,7 +666,7 @@

 /* Boundary (in *bits*) on which stack pointer should be aligned.  */
 #define STACK_BOUNDARY \
- (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+ (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)

 /* Stack boundary of the main function guaranteed by OS.  */
 #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
@@ -1584,7 +1592,7 @@
   int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
   int float_in_sse;		/* 1 if in 32-bit mode SFmode (2 for DFmode) should
 				   be passed in SSE registers.  Otherwise 0.  */
-  int call_abi;			/* Set to SYSV_ABI for sysv abi. Otherwise
+  enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
 } CUMULATIVE_ARGS;

@@ -2230,6 +2238,7 @@
   PROCESSOR_GENERIC32,
   PROCESSOR_GENERIC64,
   PROCESSOR_AMDFAM10,
+  PROCESSOR_ATOM,
   PROCESSOR_max
 };

@@ -2403,7 +2412,7 @@
   int tls_descriptor_call_expanded_p;
   /* This value is used for amd64 targets and specifies the current abi
      to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
-  int call_abi;
+   enum calling_abi call_abi;
 };

 #define ix86_stack_locals (cfun->machine->stack_locals)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -316,7 +316,7 @@


 ;; Processor type.
-(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
 		    generic64,amdfam10"
   (const (symbol_ref "ix86_schedule")))

@@ -612,6 +612,12 @@
 (define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
   (const_string "any"))

+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
+(define_attr "use_carry" "0,1" (const_string "0"))
+
+;; Define attribute to indicate unaligned ssemov insns
+(define_attr "movu" "0,1" (const_string "0"))
+
 ;; Describe a user's asm statement.
 (define_asm_attributes
   [(set_attr "length" "128")
@@ -727,6 +733,7 @@
 (include "k6.md")
 (include "athlon.md")
 (include "geode.md")
+(include "atom.md")


 ;; Operand and operator predicates and constraints
@@ -5790,6 +5797,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
   "adc{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "DI")])

@@ -5864,6 +5872,7 @@
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
   "adc{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "QI")])

@@ -5876,6 +5885,7 @@
   "ix86_binary_operator_ok (PLUS, HImode, operands)"
   "adc{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "HI")])

@@ -5888,6 +5898,7 @@
   "ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])

@@ -5901,6 +5912,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])

@@ -6130,9 +6142,9 @@
    (set_attr "mode" "SI")])

 (define_insn "*adddi_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
 {
@@ -6153,6 +6165,10 @@
 	}

     default:
+      /* Use add as much as possible to replace lea for AGU optimization. */
+      if (which_alternative == 2 && TARGET_OPT_AGU)
+        return "add{q}\t{%1, %0|%0, %1}";
+
       gcc_assert (rtx_equal_p (operands[0], operands[1]));

       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6171,8 +6187,11 @@
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(and (eq_attr "alternative" "2")
+                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 	      (const_string "lea")
+            (eq_attr "alternative" "3")
+              (const_string "lea")
 	    ; Current assemblers are broken and do not allow @GOTOFF in
 	    ; ought but a memory context.
 	    (match_operand:DI 2 "pic_symbolic_operand" "")
@@ -6189,8 +6208,8 @@
 	(plus:DI (match_operand:DI 1 "register_operand" "")
 		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  "TARGET_64BIT && reload_completed
+   && ix86_lea_for_add_ok (PLUS, insn, operands)"
   [(set (match_dup 0)
 	(plus:DI (match_dup 1)
 		 (match_dup 2)))]
@@ -6394,9 +6413,9 @@


 (define_insn "*addsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
-	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:SI 2 "general_operand" "g,ri,li")))
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:SI 2 "general_operand" "g,ri,0,li")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6417,6 +6436,10 @@
 	}

     default:
+      /* Use add as much as possible to replace lea for AGU optimization. */
+      if (which_alternative == 2 && TARGET_OPT_AGU)
+        return "add{l}\t{%1, %0|%0, %1}";
+
       gcc_assert (rtx_equal_p (operands[0], operands[1]));

       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6433,7 +6456,10 @@
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(and (eq_attr "alternative" "2")
+                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+               (const_string "lea")
+            (eq_attr "alternative" "3")
 	      (const_string "lea")
 	    ; Current assemblers are broken and do not allow @GOTOFF in
 	    ; ought but a memory context.
@@ -6451,8 +6477,7 @@
 	(plus (match_operand 1 "register_operand" "")
               (match_operand 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)"
   [(const_int 0)]
 {
   rtx pat;
@@ -7553,6 +7578,7 @@
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
   "sbb{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "DI")])

@@ -7601,6 +7627,7 @@
   "ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sbb{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "QI")])

@@ -7613,6 +7640,7 @@
   "ix86_binary_operator_ok (MINUS, HImode, operands)"
   "sbb{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "HI")])

@@ -7625,6 +7653,7 @@
   "ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sbb{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])

@@ -15155,7 +15184,7 @@
 		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
 		    operands[0], const0_rtx,
 		    GEN_INT ((TARGET_64BIT
-			      ? (DEFAULT_ABI == SYSV_ABI
+			      ? (ix86_abi == SYSV_ABI
 				 ? X86_64_SSE_REGPARM_MAX
 				 : X64_SSE_REGPARM_MAX)
 			      : X86_32_SSE_REGPARM_MAX)
@@ -15235,6 +15264,7 @@
   "reload_completed"
   "ret"
   [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])

@@ -15247,6 +15277,7 @@
   "reload_completed"
   "rep\;ret"
   [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
    (set_attr "prefix_rep" "1")
    (set_attr "modrm" "0")])
@@ -15257,6 +15288,7 @@
   "reload_completed"
   "ret\t%0"
   [(set_attr "length" "3")
+   (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "2")
    (set_attr "modrm" "0")])

@@ -15610,7 +15642,7 @@
 	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
   ""
 {
-  if (!TARGET_BSWAP)
+  if (!(TARGET_BSWAP || TARGET_MOVBE))
     {
       rtx x = operands[0];

@@ -15622,6 +15654,21 @@
     }
 })

+(define_insn "*bswapsi_movbe"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m")
+	(bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    bswap\t%0
+    movbe\t{%1, %0|%0, %1}
+    movbe\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,imov,imov")
+   (set_attr "modrm" "*,1,1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "length" "2,*,*")
+   (set_attr "mode" "SI")])
+
 (define_insn "*bswapsi_1"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(bswap:SI (match_operand:SI 1 "register_operand" "0")))]
@@ -15650,7 +15697,29 @@
   [(set_attr "length" "4")
    (set_attr "mode" "HI")])

-(define_insn "bswapdi2"
+(define_expand "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(bswap:DI (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*bswapdi_movbe"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
+	(bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_64BIT && TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    bswap\t%0
+    movbe\t{%1, %0|%0, %1}
+    movbe\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,imov,imov")
+   (set_attr "modrm" "*,1,1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "length" "3,*,*")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bswapdi_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(bswap:DI (match_operand:DI 1 "register_operand" "0")))]
   "TARGET_64BIT"
@@ -16378,6 +16447,7 @@
   "TARGET_SSE_MATH"
   "%vrcpss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])

@@ -16729,6 +16799,7 @@
   "TARGET_SSE_MATH"
   "%vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])

@@ -16749,6 +16820,7 @@
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "*")
@@ -19802,6 +19874,7 @@
   ; Since we don't have the proper number of operands for an alu insn,
   ; fill in all the blanks.
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19817,6 +19890,7 @@
   ""
   "sbb{q}\t%0, %0"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19860,6 +19934,7 @@
   ; Since we don't have the proper number of operands for an alu insn,
   ; fill in all the blanks.
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -19875,6 +19950,7 @@
   ""
   "sbb{l}\t%0, %0"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
@@ -20207,7 +20283,8 @@
     }
 }
   [(set (attr "type")
-	(cond [(eq_attr "alternative" "0")
+	(cond [(and (eq_attr "alternative" "0")
+	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 		 (const_string "alu")
 	       (match_operand:SI 2 "const0_operand" "")
 		 (const_string "imov")
@@ -20250,7 +20327,8 @@
     }
 }
   [(set (attr "type")
-	(cond [(eq_attr "alternative" "0")
+	(cond [(and (eq_attr "alternative" "0")
+	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
 		 (const_string "alu")
 	       (match_operand:DI 2 "const0_operand" "")
 		 (const_string "imov")
@@ -21734,6 +21812,7 @@
   return patterns[locality];
 }
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
    (set_attr "memory" "none")])

 (define_insn "*prefetch_sse_rex"
@@ -21752,6 +21831,7 @@
   return patterns[locality];
 }
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
    (set_attr "memory" "none")])

 (define_insn "*prefetch_3dnow"
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -228,6 +228,10 @@
 Target RejectNegative Joined Var(ix86_tune_string)
 Schedule code for given CPU

+mabi=
+Target RejectNegative Joined Var(ix86_abi_string)
+Generate code that conforms to the given ABI
+
 mveclibabi=
 Target RejectNegative Joined Var(ix86_veclibabi_string)
 Vector library ABI to use
@@ -335,6 +339,10 @@
 Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save
 Support code generation of sahf instruction in 64bit x86-64 code.

+mmovbe
+Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) VarExists Save
+Support code generation of movbe instruction.
+
 maes
 Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save
 Support AES built-in functions and code generation
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -86,6 +86,9 @@
 extern void ix86_expand_binary_operator (enum rtx_code,
 					 enum machine_mode, rtx[]);
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
 extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
 					rtx[]);
 extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
@@ -140,9 +143,8 @@
 extern bool ix86_sol10_return_in_memory (const_tree,const_tree);
 extern rtx ix86_force_to_memory (enum machine_mode, rtx);
 extern void ix86_free_from_memory (enum machine_mode);
-extern int ix86_cfun_abi (void);
-extern int ix86_function_abi (const_tree);
-extern int ix86_function_type_abi (const_tree);
+extern enum calling_abi ix86_cfun_abi (void);
+extern enum calling_abi ix86_function_type_abi (const_tree);
 extern void ix86_call_abi_override (const_tree);
 extern tree ix86_fn_abi_va_list (tree);
 extern tree ix86_canonical_va_list_type (tree);
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -38,7 +38,7 @@
       builtin_define_std ("WINNT");				\
       builtin_define_with_int_value ("_INTEGRAL_MAX_BITS",	\
 				     TYPE_PRECISION (intmax_type_node));\
-      if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)			\
+      if (TARGET_64BIT && ix86_abi == MS_ABI)			\
 	{							\
 	  builtin_define ("__MINGW64__");			\
 	  builtin_define_std ("WIN64");				\
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -338,6 +338,7 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])

@@ -363,6 +364,7 @@
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "mode" "<MODE>")])

 (define_insn "avx_movdqu<avxmodesuffix>"
@@ -373,6 +375,7 @@
   "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "vmovdqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")])

@@ -383,6 +386,7 @@
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "movdqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])

@@ -424,7 +428,7 @@
 		     UNSPEC_MOVNT))]
   "TARGET_SSE2"
   "movntdq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])

@@ -434,7 +438,7 @@
 		   UNSPEC_MOVNT))]
   "TARGET_SSE2"
   "movnti\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "mode" "V2DF")])

 (define_insn "avx_lddqu<avxmodesuffix>"
@@ -445,6 +449,7 @@
   "TARGET_AVX"
   "vlddqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
+   (set_attr "movu" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<avxvecmode>")])

@@ -454,7 +459,8 @@
 		      UNSPEC_LDDQU))]
   "TARGET_SSE3"
   "lddqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "TI")])

@@ -761,6 +767,7 @@
   "TARGET_SSE"
   "%vrcpps\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])

@@ -787,6 +794,7 @@
   "TARGET_SSE"
   "rcpss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
    (set_attr "mode" "SF")])

 (define_expand "sqrtv8sf2"
@@ -832,6 +840,7 @@
   "TARGET_SSE"
   "%vsqrtps\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])

@@ -876,6 +885,7 @@
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
    (set_attr "mode" "<ssescalarmode>")])

 (define_expand "rsqrtv8sf2"
@@ -1039,7 +1049,7 @@
 	 (const_int 1)))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
+  [(set_attr "type" "sseadd")
    (set_attr "mode" "<ssescalarmode>")])

 ;; These versions of the min/max patterns implement exactly the operations
@@ -1175,6 +1185,7 @@
   "TARGET_SSE3"
   "addsubpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "mode" "V2DF")])

 (define_insn "avx_h<plusminus_insn>v4df3"
@@ -1298,6 +1309,7 @@
   "TARGET_SSE3"
   "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "V4SF")])

@@ -5066,6 +5078,7 @@
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
   "pmaddwd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])

@@ -7025,6 +7038,7 @@
    movq\t{%H1, %0|%0, %H1}
    mov{q}\t{%H1, %0|%0, %H1}"
   [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+   (set_attr "atom_unit" "*,sishuf,*,*")
    (set_attr "memory" "*,none,*,*")
    (set_attr "mode" "V2SF,TI,TI,DI")])

@@ -7057,6 +7071,7 @@
    psrldq\t{$8, %0|%0, 8}
    movq\t{%H1, %0|%0, %H1}"
   [(set_attr "type" "ssemov,sseishft,ssemov")
+   (set_attr "atom_unit" "*,sishuf,*")
    (set_attr "memory" "*,none,*")
    (set_attr "mode" "V2SF,TI,TI")])

@@ -7614,6 +7629,7 @@
   "TARGET_SSE2"
   "psadbw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])

@@ -7635,7 +7651,7 @@
 	  UNSPEC_MOVMSK))]
   "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])

@@ -7645,7 +7661,7 @@
 		   UNSPEC_MOVMSK))]
   "TARGET_SSE2"
   "%vpmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
@@ -7668,7 +7684,7 @@
   "TARGET_SSE2 && !TARGET_64BIT"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -7682,7 +7698,7 @@
   "TARGET_SSE2 && TARGET_64BIT"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -7693,6 +7709,7 @@
   "TARGET_SSE"
   "%vldmxcsr\t%0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "load")])

@@ -7702,6 +7719,7 @@
   "TARGET_SSE"
   "%vstmxcsr\t%0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "store")])

@@ -7720,6 +7738,7 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "sfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])

 (define_insn "sse2_clflush"
@@ -7728,6 +7747,7 @@
   "TARGET_SSE2"
   "clflush\t%a0"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])

 (define_expand "sse2_mfence"
@@ -7745,6 +7765,7 @@
   "TARGET_64BIT || TARGET_SSE2"
   "mfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
    (set_attr "memory" "unknown")])

 (define_expand "sse2_lfence"
@@ -7762,6 +7783,7 @@
   "TARGET_SSE2"
   "lfence"
   [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "lfence")
    (set_attr "memory" "unknown")])

 (define_insn "sse3_mwait"
@@ -7885,6 +7907,7 @@
   "TARGET_SSSE3"
   "phaddw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -7913,6 +7936,7 @@
   "TARGET_SSSE3"
   "phaddw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -7967,6 +7991,7 @@
   "TARGET_SSSE3"
   "phaddd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -7987,6 +8012,7 @@
   "TARGET_SSSE3"
   "phaddd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8073,6 +8099,7 @@
   "TARGET_SSSE3"
   "phaddsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8101,6 +8128,7 @@
   "TARGET_SSSE3"
   "phaddsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8187,6 +8215,7 @@
   "TARGET_SSSE3"
   "phsubw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8215,6 +8244,7 @@
   "TARGET_SSSE3"
   "phsubw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8269,6 +8299,7 @@
   "TARGET_SSSE3"
   "phsubd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8289,6 +8320,7 @@
   "TARGET_SSSE3"
   "phsubd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8375,6 +8407,7 @@
   "TARGET_SSSE3"
   "phsubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8403,6 +8436,7 @@
   "TARGET_SSSE3"
   "phsubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8509,6 +8543,7 @@
   "TARGET_SSSE3"
   "pmaddubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8547,6 +8582,7 @@
   "TARGET_SSSE3"
   "pmaddubsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8754,6 +8790,7 @@
   return "palignr\t{%3, %2, %0|%0, %2, %3}";
 }
   [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
@@ -8770,6 +8807,7 @@
   return "palignr\t{%3, %2, %0|%0, %2, %3}";
 }
   [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])

@@ -8956,7 +8994,7 @@
 		     UNSPEC_MOVNTDQA))]
   "TARGET_SSE4_1"
   "%vmovntdqa\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -499,8 +499,11 @@
 {
   HOST_WIDE_INT rounded;

-  /* Compute as in assemble_noswitch_variable, since we don't actually
-     support aligned common.  */
+  /* Compute as in assemble_noswitch_variable, since we don't have
+     support for aligned common on older binutils.  We must also
+     avoid emitting a common symbol of size zero, as this is the
+     overloaded representation that indicates an undefined external
+     symbol in the PE object file format.  */
   rounded = size ? size : 1;
   rounded += (BIGGEST_ALIGNMENT / BITS_PER_UNIT) - 1;
   rounded = (rounded / (BIGGEST_ALIGNMENT / BITS_PER_UNIT)
@@ -510,9 +513,13 @@

   fprintf (stream, "\t.comm\t");
   assemble_name (stream, name);
-  fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC "\t" ASM_COMMENT_START
-	   " " HOST_WIDE_INT_PRINT_DEC "\n",
-	   rounded, size);
+  if (use_pe_aligned_common)
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC ", %d\n",
+	   size ? size : (HOST_WIDE_INT) 1,
+	   exact_log2 (align) - exact_log2 (CHAR_BIT));
+  else
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC "\t" ASM_COMMENT_START
+	   " " HOST_WIDE_INT_PRINT_DEC "\n", rounded, size);
 }

 /* The Microsoft linker requires that every function be marked as
--- a/gcc/config/m68k/constraints.md
+++ b/gcc/config/m68k/constraints.md
@@ -124,6 +124,11 @@
   (and (match_code "const_int")
        (match_test "ival < -0x8000 || ival > 0x7FFF")))

+(define_constraint "Cu"
+  "16-bit offset for wrapped symbols"
+  (and (match_code "const")
+       (match_test "m68k_unwrap_symbol (op, false) != op")))
+
 (define_constraint "CQ"
   "Integers valid for mvq."
   (and (match_code "const_int")
--- a/gcc/config/m68k/lb1sf68.asm
+++ b/gcc/config/m68k/lb1sf68.asm
@@ -163,6 +163,8 @@
 #if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
 	lea	\addr-.-8,a0
 	jsr	pc@(a0)
+#elif defined (__mcfisab__) || defined (__mcfisac__)
+	bsr.l	\addr
 #else
 	bsr	\addr
 #endif
@@ -202,6 +204,8 @@
 #if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
 	lea	\addr-.-8,a0
 	jsr	pc@(a0)
+#elif defined (__mcfisab__) || defined (__mcfisac__)
+	bsr.l	\addr
 #else
 	bsr	\addr
 #endif
--- a/gcc/config/m68k/linux-unwind.h
+++ b/gcc/config/m68k/linux-unwind.h
@@ -77,9 +77,15 @@
       fs->regs.reg[9].how = REG_SAVED_OFFSET;
       fs->regs.reg[9].loc.offset = (long) &sc->sc_a1 - cfa;

+#ifdef __uClinux__
+      fs->regs.reg[13].how = REG_SAVED_OFFSET;
+      fs->regs.reg[13].loc.offset = (long) &sc->sc_a5 - cfa;
+#endif
+
       fs->regs.reg[24].how = REG_SAVED_OFFSET;
       fs->regs.reg[24].loc.offset = (long) &sc->sc_pc - cfa;

+#if defined __mcffpu__ && !defined __uClinux__
       if (*(int *) sc->sc_fpstate)
 	{
 	  int *fpregs = (int *) sc->sc_fpregs;
@@ -89,11 +95,19 @@
 	  fs->regs.reg[17].how = REG_SAVED_OFFSET;
 	  fs->regs.reg[17].loc.offset = (long) &fpregs[M68K_FP_SIZE/4] - cfa;
 	}
+#elif defined __mcffpu__
+# error Implement this when uClinux kernel is ported to an FPU architecture
+#endif
     }
 #ifdef __mcoldfire__
   /* move.l #__NR_rt_sigreturn,%d0; trap #0 */
-  else if (pc[0] == 0x203c && pc[1] == 0x0000 &&
-	   pc[2] == 0x00ad && pc[3] == 0x4e40)
+  else if ((pc[0] == 0x203c && pc[1] == 0x0000 &&
+	    pc[2] == 0x00ad && pc[3] == 0x4e40) ||
+	   /* Don't ask me why, this is just what some kernels do:
+	      moveq #-__NR_rt_sigreturn,%d0; andil 0xff,%d0; trap #0;
+	      Sigh...  */
+	   (pc[0] == 0x70ad && pc[1] == 0x0280 && pc[2] == 0x0000 &&
+	    pc[3] == 0x00ff && pc[4] == 0x4e40 && pc[5] == 0x0000))
 #else
   /* moveq #~__NR_rt_sigreturn,%d0; not.b %d0; trap #0 */
   else if (pc[0] == 0x7052 && pc[1] == 0x4600 && pc[2] == 0x4e40)
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -46,6 +46,7 @@
 /* ??? Need to add a dependency between m68k.o and sched-int.h.  */
 #include "sched-int.h"
 #include "insn-codes.h"
+#include "ggc.h"

 enum reg_class regno_reg_class[] =
 {
@@ -146,10 +147,12 @@
 static void m68k_compute_frame_layout (void);
 static bool m68k_save_reg (unsigned int regno, bool interrupt_handler);
 static bool m68k_ok_for_sibcall_p (tree, tree);
+static bool m68k_tls_symbol_p (rtx);
 static bool m68k_rtx_costs (rtx, int, int, int *, bool);
 #if M68K_HONOR_TARGET_STRICT_ALIGNMENT
 static bool m68k_return_in_memory (const_tree, const_tree);
 #endif
+static void m68k_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;


 /* Specify the identification number of the library being built */
@@ -252,6 +255,14 @@
 #define TARGET_RETURN_IN_MEMORY m68k_return_in_memory
 #endif

+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (true)
+
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL m68k_output_dwarf_dtprel
+#endif
+
 static const struct attribute_spec m68k_attribute_table[] =
 {
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
@@ -1150,8 +1161,7 @@
 			    current_frame.reg_mask, true, true));
     }

-  if (flag_pic
-      && !TARGET_SEP_DATA
+  if (!TARGET_SEP_DATA
       && crtl->uses_pic_offset_table)
     insn = emit_insn (gen_load_got (pic_offset_table_rtx));
 }
@@ -1425,6 +1435,86 @@
   return replace_equiv_address (x, gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM));
 }

+/* Convert X to a legitimate address and return it if successful.  Otherwise
+   return X.
+
+   For the 68000, we handle X+REG by loading X into a register R and
+   using R+REG.  R will go in an address reg and indexing will be used.
+   However, if REG is a broken-out memory address or multiplication,
+   nothing needs to be done because REG can certainly go in an address reg.  */
+
+rtx
+m68k_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (m68k_tls_symbol_p (x))
+    return m68k_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      int ch = (x) != (oldx);
+      int copied = 0;
+
+#define COPY_ONCE(Y) if (!copied) { Y = copy_rtx (Y); copied = ch = 1; }
+
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	}
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	}
+      if (ch)
+	{
+          if (GET_CODE (XEXP (x, 1)) == REG
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    {
+	      if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	        {
+	          COPY_ONCE (x);
+	          x = force_operand (x, 0);
+	        }
+	      return x;
+	    }
+	  if (memory_address_p (mode, x))
+	    return x;
+	}
+      if (GET_CODE (XEXP (x, 0)) == REG
+	  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 1), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    x = force_operand (x, 0);
+	}
+      else if (GET_CODE (XEXP (x, 1)) == REG
+	       || (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+		   && GET_MODE (XEXP (XEXP (x, 1), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 0), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 1)) == REG)
+	    x = force_operand (x, 0);
+	}
+    }
+
+  return x;
+}
+
+
 /* Output a dbCC; jCC sequence.  Note we do not handle the
    floating point version of this sequence (Fdbcc).  We also
    do not handle alternative conditions when CC_NO_OVERFLOW is
@@ -1713,15 +1803,16 @@
    whether we need strict checking.  */

 bool
-m68k_legitimate_index_reg_p (rtx x, bool strict_p)
+m68k_legitimate_index_reg_p (enum machine_mode mode, rtx x, bool strict_p)
 {
   if (!strict_p && GET_CODE (x) == SUBREG)
     x = SUBREG_REG (x);

   return (REG_P (x)
 	  && (strict_p
-	      ? REGNO_OK_FOR_INDEX_P (REGNO (x))
-	      : REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO (x))));
+	      ? REGNO_MODE_OK_FOR_INDEX_P (REGNO (x), mode)
+	      : (MODE_OK_FOR_INDEX_P (mode)
+		 && REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO (x)))));
 }

 /* Return true if X is a legitimate index expression for a (d8,An,Xn) or
@@ -1729,7 +1820,8 @@
    ADDRESS if so.  STRICT_P says whether we need strict checking.  */

 static bool
-m68k_decompose_index (rtx x, bool strict_p, struct m68k_address *address)
+m68k_decompose_index (enum machine_mode mode, rtx x, bool strict_p,
+		      struct m68k_address *address)
 {
   int scale;

@@ -1753,7 +1845,7 @@
       && GET_MODE (XEXP (x, 0)) == HImode)
     x = XEXP (x, 0);

-  if (m68k_legitimate_index_reg_p (x, strict_p))
+  if (m68k_legitimate_index_reg_p (mode, x, strict_p))
     {
       address->scale = scale;
       address->index = x;
@@ -1777,7 +1869,7 @@
 	  && !offset_within_block_p (base, INTVAL (offset)))
 	return true;
     }
-  return false;
+  return m68k_tls_reference_p (x, false);
 }

 /* Return true if X is a legitimate constant address that can reach
@@ -1805,7 +1897,7 @@
 	return false;
     }

-  return true;
+  return !m68k_tls_reference_p (x, false);
 }

 /* Return true if X is a LABEL_REF for a jump table.  Assume that unplaced
@@ -1872,15 +1964,17 @@
   /* Check for GOT loads.  These are (bd,An,Xn) addresses if
      TARGET_68020 && flag_pic == 2, otherwise they are (d16,An)
      addresses.  */
-  if (flag_pic
-      && GET_CODE (x) == PLUS
-      && XEXP (x, 0) == pic_offset_table_rtx
-      && (GET_CODE (XEXP (x, 1)) == SYMBOL_REF
-	  || GET_CODE (XEXP (x, 1)) == LABEL_REF))
+  if (GET_CODE (x) == PLUS
+      && XEXP (x, 0) == pic_offset_table_rtx)
     {
-      address->base = XEXP (x, 0);
-      address->offset = XEXP (x, 1);
-      return true;
+      /* As we are processing a PLUS, do not unwrap RELOC32 symbols --
+	 they are invalid in this context.  */
+      if (m68k_unwrap_symbol (XEXP (x, 1), false) != XEXP (x, 1))
+	{
+	  address->base = XEXP (x, 0);
+	  address->offset = XEXP (x, 1);
+	  return true;
+	}
     }

   /* The ColdFire FPU only accepts addressing modes 2-5.  */
@@ -1905,7 +1999,7 @@
      accesses to unplaced labels in other cases.  */
   if (GET_CODE (x) == PLUS
       && m68k_jump_table_ref_p (XEXP (x, 1))
-      && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+      && m68k_decompose_index (mode, XEXP (x, 0), strict_p, address))
     {
       address->offset = XEXP (x, 1);
       return true;
@@ -1937,7 +2031,7 @@
 	 worse code.  */
       if (address->offset
 	  && symbolic_operand (address->offset, VOIDmode)
-	  && m68k_decompose_index (x, strict_p, address))
+	  && m68k_decompose_index (mode, x, strict_p, address))
 	return true;
     }
   else
@@ -1956,14 +2050,14 @@
   if (GET_CODE (x) == PLUS)
     {
       if (m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p)
-	  && m68k_decompose_index (XEXP (x, 1), strict_p, address))
+	  && m68k_decompose_index (mode, XEXP (x, 1), strict_p, address))
 	{
 	  address->base = XEXP (x, 0);
 	  return true;
 	}

       if (m68k_legitimate_base_reg_p (XEXP (x, 1), strict_p)
-	  && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+	  && m68k_decompose_index (mode, XEXP (x, 0), strict_p, address))
 	{
 	  address->base = XEXP (x, 1);
 	  return true;
@@ -2025,6 +2119,243 @@
 	  && !address.index);
 }

+/* Return GOT pointer.  */
+
+static rtx
+m68k_get_gp (void)
+{
+  if (pic_offset_table_rtx == NULL_RTX)
+    pic_offset_table_rtx = gen_rtx_REG (Pmode, PIC_REG);
+
+  crtl->uses_pic_offset_table = 1;
+
+  return pic_offset_table_rtx;
+}
+
+/* M68K relocations, used to distinguish GOT and TLS relocations in UNSPEC
+   wrappers.  */
+enum m68k_reloc { RELOC_GOT, RELOC_TLSGD, RELOC_TLSLDM, RELOC_TLSLDO,
+		  RELOC_TLSIE, RELOC_TLSLE };
+
+#define TLS_RELOC_P(RELOC) ((RELOC) != RELOC_GOT)
+
+/* Wrap symbol X into unspec representing relocation RELOC.
+   BASE_REG - register that should be added to the result.
+   TEMP_REG - if non-null, temporary register.  */
+
+static rtx
+m68k_wrap_symbol (rtx x, enum m68k_reloc reloc, rtx base_reg, rtx temp_reg)
+{
+  bool use_x_p;
+
+  use_x_p = (base_reg == pic_offset_table_rtx) ? TARGET_XGOT : TARGET_XTLS;
+
+  if (TARGET_COLDFIRE && use_x_p)
+    /* When compiling with -mx{got, tls} switch the code will look like this:
+
+       move.l <X>@<RELOC>,<TEMP_REG>
+       add.l <BASE_REG>,<TEMP_REG>  */
+    {
+      /* Wrap X in UNSPEC_??? to tip m68k_output_addr_const_extra
+	 to put @RELOC after reference.  */
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC32);
+      x = gen_rtx_CONST (Pmode, x);
+
+      if (temp_reg == NULL)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  temp_reg = gen_reg_rtx (Pmode);
+	}
+
+      emit_move_insn (temp_reg, x);
+      emit_insn (gen_addsi3 (temp_reg, temp_reg, base_reg));
+      x = temp_reg;
+    }
+  else
+    {
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC16);
+      x = gen_rtx_CONST (Pmode, x);
+
+      x = gen_rtx_PLUS (Pmode, base_reg, x);
+    }
+
+  return x;
+}
+
+/* Helper for m68k_unwrap_symbol.
+   Also, if unwrapping was successful (that is if (ORIG != <return value>)),
+   sets *RELOC_PTR to relocation type for the symbol.  */
+
+static rtx
+m68k_unwrap_symbol_1 (rtx orig, bool unwrap_reloc32_p,
+		      enum m68k_reloc *reloc_ptr)
+{
+  if (GET_CODE (orig) == CONST)
+    {
+      rtx x;
+      enum m68k_reloc dummy;
+
+      x = XEXP (orig, 0);
+
+      if (reloc_ptr == NULL)
+	reloc_ptr = &dummy;
+
+      /* Handle an addend.  */
+      if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS)
+	  && CONST_INT_P (XEXP (x, 1)))
+	x = XEXP (x, 0);
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_RELOC16:
+	      orig = XVECEXP (x, 0, 0);
+	      *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+	      break;
+
+	    case UNSPEC_RELOC32:
+	      if (unwrap_reloc32_p)
+		{
+		  orig = XVECEXP (x, 0, 0);
+		  *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  return orig;
+}
+
+/* Unwrap symbol from UNSPEC_RELOC16 and, if unwrap_reloc32_p,
+   UNSPEC_RELOC32 wrappers.  */
+
+rtx
+m68k_unwrap_symbol (rtx orig, bool unwrap_reloc32_p)
+{
+  return m68k_unwrap_symbol_1 (orig, unwrap_reloc32_p, NULL);
+}
+
+/* Helper for m68k_final_prescan_insn.  */
+
+static int
+m68k_final_prescan_insn_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_ptr;
+
+  if (m68k_unwrap_symbol (x, true) != x)
+    /* For rationale of the below, see comment in m68k_final_prescan_insn.  */
+    {
+      rtx plus;
+
+      gcc_assert (GET_CODE (x) == CONST);
+      plus = XEXP (x, 0);
+
+      if (GET_CODE (plus) == PLUS || GET_CODE (plus) == MINUS)
+	{
+	  rtx unspec;
+	  rtx addend;
+
+	  unspec = XEXP (plus, 0);
+	  gcc_assert (GET_CODE (unspec) == UNSPEC);
+	  addend = XEXP (plus, 1);
+	  gcc_assert (CONST_INT_P (addend));
+
+	  /* We now have all the pieces, rearrange them.  */
+
+	  /* Move symbol to plus.  */
+	  XEXP (plus, 0) = XVECEXP (unspec, 0, 0);
+
+	  /* Move plus inside unspec.  */
+	  XVECEXP (unspec, 0, 0) = plus;
+
+	  /* Move unspec to top level of const.  */
+	  XEXP (x, 0) = unspec;
+	}
+
+      return -1;
+    }
+
+  return 0;
+}
+
+/* Prescan insn before outputing assembler for it.  */
+
+void
+m68k_final_prescan_insn (rtx insn ATTRIBUTE_UNUSED,
+			 rtx *operands, int n_operands)
+{
+  int i;
+
+  /* Combine and, possibly, other optimizations may do good job
+     converting
+       (const (unspec [(symbol)]))
+     into
+       (const (plus (unspec [(symbol)])
+                    (const_int N))).
+     The problem with this is emitting @TLS or @GOT decorations.
+     The decoration is emitted when processing (unspec), so the
+     result would be "#symbol@TLSLE+N" instead of "#symbol+N@TLSLE".
+
+     It seems that the easiest solution to this is to convert such
+     operands to
+       (const (unspec [(plus (symbol)
+                             (const_int N))])).
+     Note, that the top level of operand remains intact, so we don't have
+     to patch up anything outside of the operand.  */
+
+  for (i = 0; i < n_operands; ++i)
+    {
+      rtx op;
+
+      op = operands[i];
+
+      for_each_rtx (&op, m68k_final_prescan_insn_1, NULL);
+    }
+}
+
+/* Move X to a register and add REG_EQUAL note pointing to ORIG.
+   If REG is non-null, use it; generate new pseudo otherwise.  */
+
+static rtx
+m68k_move_to_reg (rtx x, rtx orig, rtx reg)
+{
+  rtx insn;
+
+  if (reg == NULL_RTX)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      reg = gen_reg_rtx (Pmode);
+    }
+
+  insn = emit_move_insn (reg, x);
+  /* Put a REG_EQUAL note on this insn, so that it can be optimized
+     by loop.  */
+  set_unique_reg_note (insn, REG_EQUAL, orig);
+
+  return reg;
+}
+
+/* Does the same as m68k_wrap_symbol, but returns a memory reference to
+   GOT slot.  */
+
+static rtx
+m68k_wrap_symbol_into_got_ref (rtx x, enum m68k_reloc reloc, rtx temp_reg)
+{
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), temp_reg);
+
+  x = gen_rtx_MEM (Pmode, x);
+  MEM_READONLY_P (x) = 1;
+
+  return x;
+}
+
 /* Legitimize PIC addresses.  If the address is already
    position-independent, we return ORIG.  Newly generated
    position-independent addresses go to REG.  If we need more
@@ -2076,42 +2407,15 @@
     {
       gcc_assert (reg);

-      if (TARGET_COLDFIRE && TARGET_XGOT)
-	/* When compiling with -mxgot switch the code for the above
-	   example will look like this:
-
-	   movel a5, a0
-	   addl _foo@GOT, a0
-	   movel a0@, a0
-	   movel #12345, a0@  */
-	{
-	  rtx pic_offset;
-
-	  /* Wrap ORIG in UNSPEC_GOTOFF to tip m68k_output_addr_const_extra
-	     to put @GOT after reference.  */
-	  pic_offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),
-				       UNSPEC_GOTOFF);
-	  pic_offset = gen_rtx_CONST (Pmode, pic_offset);
-	  emit_move_insn (reg, pic_offset);
-	  emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
-	  pic_ref = gen_rtx_MEM (Pmode, reg);
-	}
-      else
-	pic_ref = gen_rtx_MEM (Pmode,
-			       gen_rtx_PLUS (Pmode,
-					     pic_offset_table_rtx, orig));
-      crtl->uses_pic_offset_table = 1;
-      MEM_READONLY_P (pic_ref) = 1;
-      emit_move_insn (reg, pic_ref);
-      return reg;
+      pic_ref = m68k_wrap_symbol_into_got_ref (orig, RELOC_GOT, reg);
+      pic_ref = m68k_move_to_reg (pic_ref, orig, reg);
     }
   else if (GET_CODE (orig) == CONST)
     {
       rtx base;

       /* Make sure this has not already been legitimized.  */
-      if (GET_CODE (XEXP (orig, 0)) == PLUS
-	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+      if (m68k_unwrap_symbol (orig, true) != orig)
 	return orig;

       gcc_assert (reg);
@@ -2124,13 +2428,257 @@
 				     base == reg ? 0 : reg);

       if (GET_CODE (orig) == CONST_INT)
-	return plus_constant (base, INTVAL (orig));
-      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
-      /* Likewise, should we set special REG_NOTEs here?  */
+	pic_ref = plus_constant (base, INTVAL (orig));
+      else
+	pic_ref = gen_rtx_PLUS (Pmode, base, orig);
     }
+
   return pic_ref;
 }

+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_tls_get_addr;
+
+/* Return SYMBOL_REF for __tls_get_addr.  */
+
+static rtx
+m68k_get_tls_get_addr (void)
+{
+  if (m68k_tls_get_addr == NULL_RTX)
+    m68k_tls_get_addr = init_one_libfunc ("__tls_get_addr");
+
+  return m68k_tls_get_addr;
+}
+
+/* Return libcall result in A0 instead of usual D0.  */
+static bool m68k_libcall_value_in_a0_p = false;
+
+/* Emit instruction sequence that calls __tls_get_addr.  X is
+   the TLS symbol we are referencing and RELOC is the symbol type to use
+   (either TLSGD or TLSLDM).  EQV is the REG_EQUAL note for the sequence
+   emitted.  A pseudo register with result of __tls_get_addr call is
+   returned.  */
+
+static rtx
+m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
+{
+  rtx a0;
+  rtx insns;
+  rtx dest;
+
+  /* Emit the call sequence.  */
+  start_sequence ();
+
+  /* FIXME: Unfortunately, emit_library_call_value does not
+     consider (plus (%a5) (const (unspec))) to be a good enough
+     operand for push, so it forces it into a register.  The bad
+     thing about this is that combiner, due to copy propagation and other
+     optimizations, sometimes can not later fix this.  As a consequence,
+     additional register may be allocated resulting in a spill.
+     For reference, see args processing loops in
+     calls.c:emit_library_call_value_1.
+     For testcase, see gcc.target/m68k/tls-{gd, ld}.c  */
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), NULL_RTX);
+
+  /* __tls_get_addr() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __tls_get_addr() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */
+
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_tls_get_addr (), NULL_RTX, LCT_PURE,
+				Pmode, 1, x, Pmode);
+  m68k_libcall_value_in_a0_p = false;
+
+  insns = get_insns ();
+  end_sequence ();
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_read_tp;
+
+/* Return SYMBOL_REF for __m68k_read_tp.  */
+
+static rtx
+m68k_get_m68k_read_tp (void)
+{
+  if (m68k_read_tp == NULL_RTX)
+    m68k_read_tp = init_one_libfunc ("__m68k_read_tp");
+
+  return m68k_read_tp;
+}
+
+/* Emit instruction sequence that calls __m68k_read_tp.
+   A pseudo register with result of __m68k_read_tp call is returned.  */
+
+static rtx
+m68k_call_m68k_read_tp (void)
+{
+  rtx a0;
+  rtx eqv;
+  rtx insns;
+  rtx dest;
+
+  start_sequence ();
+
+  /* __m68k_read_tp() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __m68k_read_tp() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */
+
+  /* Emit the call sequence.  */
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
+				Pmode, 0);
+  m68k_libcall_value_in_a0_p = false;
+  insns = get_insns ();
+  end_sequence ();
+
+  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+     share the m68k_read_tp result with other IE/LE model accesses.  */
+  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_RELOC32);
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* Return a legitimized address for accessing TLS SYMBOL_REF X.
+   For explanations on instructions sequences see TLS/NPTL ABI for m68k and
+   ColdFire.  */
+
+rtx
+m68k_legitimize_tls_address (rtx orig)
+{
+  switch (SYMBOL_REF_TLS_MODEL (orig))
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      orig = m68k_call_tls_get_addr (orig, orig, RELOC_TLSGD);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      {
+	rtx eqv;
+	rtx a0;
+	rtx x;
+
+	/* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	   share the LDM result with other LD model accesses.  */
+	eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			      UNSPEC_RELOC32);
+
+	a0 = m68k_call_tls_get_addr (orig, eqv, RELOC_TLSLDM);
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLDO, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_INITIAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol_into_got_ref (orig, RELOC_TLSIE, NULL_RTX);
+	x = gen_rtx_PLUS (Pmode, x, a0);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_LOCAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLE, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return orig;
+}
+
+/* Return true if X is a TLS symbol.  */
+
+static bool
+m68k_tls_symbol_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Helper for m68k_tls_referenced_p.  */
+
+static int
+m68k_tls_reference_p_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  /* Note: this is not the same as m68k_tls_symbol_p.  */
+  if (GET_CODE (*x_ptr) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x_ptr) != 0 ? 1 : 0;
+
+  /* Don't recurse into legitimate TLS references.  */
+  if (m68k_tls_reference_p (*x_ptr, true))
+    return -1;
+
+  return 0;
+}
+
+/* If !LEGITIMATE_P, return true if X is a TLS symbol reference,
+   though illegitimate one.
+   If LEGITIMATE_P, return true if X is a legitimate TLS symbol reference.  */
+
+bool
+m68k_tls_reference_p (rtx x, bool legitimate_p)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (!legitimate_p)
+    return for_each_rtx (&x, m68k_tls_reference_p_1, NULL) == 1 ? true : false;
+  else
+    {
+      enum m68k_reloc reloc = RELOC_GOT;
+
+      return (m68k_unwrap_symbol_1 (x, true, &reloc) != x
+	      && TLS_RELOC_P (reloc));
+    }
+}
+


 #define USE_MOVQ(i)	((unsigned) ((i) + 128) <= 255)
@@ -3918,18 +4466,92 @@
     }
 }

+/* Return string for TLS relocation RELOC.  */
+
+static const char *
+m68k_get_reloc_decoration (enum m68k_reloc reloc)
+{
+  /* To my knowledge, !MOTOROLA assemblers don't support TLS.  */
+  gcc_assert (MOTOROLA || reloc == RELOC_GOT);
+
+  switch (reloc)
+    {
+    case RELOC_GOT:
+      if (MOTOROLA)
+	{
+	  if (flag_pic == 1 && TARGET_68020)
+	    return "@GOT.w";
+	  else
+	    return "@GOT";
+	}
+      else
+	{
+	  if (TARGET_68020)
+	    {
+	      switch (flag_pic)
+		{
+		case 1:
+		  return ":w";
+		case 2:
+		  return ":l";
+		default:
+		  return "";
+		}
+	    }
+	}
+
+    case RELOC_TLSGD:
+      return "@TLSGD";
+
+    case RELOC_TLSLDM:
+      return "@TLSLDM";
+
+    case RELOC_TLSLDO:
+      return "@TLSLDO";
+
+    case RELOC_TLSIE:
+      return "@TLSIE";
+
+    case RELOC_TLSLE:
+      return "@TLSLE";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* m68k implementation of OUTPUT_ADDR_CONST_EXTRA.  */

 bool
 m68k_output_addr_const_extra (FILE *file, rtx x)
 {
-  if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_GOTOFF)
-    return false;
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_RELOC16:
+	case UNSPEC_RELOC32:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs (m68k_get_reloc_decoration (INTVAL (XVECEXP (x, 0, 1))), file);
+	  return true;

-  output_addr_const (file, XVECEXP (x, 0, 0));
-  /* ??? What is the non-MOTOROLA syntax?  */
-  fputs ("@GOT", file);
-  return true;
+	default:
+	  break;
+	}
+    }
+
+  return false;
+}
+
+/* M68K implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void
+m68k_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.long\t", file);
+  output_addr_const (file, x);
+  fputs ("@TLSLDO+0x8000", file);
 }


@@ -4019,15 +4641,8 @@
 	  else
 	    {
 	      if (address.offset)
-		{
-		  output_addr_const (file, address.offset);
-		  if (flag_pic && address.base == pic_offset_table_rtx)
-		    {
-		      fprintf (file, "@GOT");
-		      if (flag_pic == 1 && TARGET_68020)
-			fprintf (file, ".w");
-		    }
-		}
+		output_addr_const (file, address.offset);
+
 	      putc ('(', file);
 	      if (address.base)
 		fputs (M68K_REGNAME (REGNO (address.base)), file);
@@ -4060,19 +4675,7 @@
 		    fputs (M68K_REGNAME (REGNO (address.base)), file);
 		  fprintf (file, "@(");
 		  if (address.offset)
-		    {
-		      output_addr_const (file, address.offset);
-		      if (address.base == pic_offset_table_rtx && TARGET_68020)
-			switch (flag_pic)
-			  {
-			  case 1:
-			    fprintf (file, ":w"); break;
-			  case 2:
-			    fprintf (file, ":l"); break;
-			  default:
-			    break;
-			  }
-		    }
+		    output_addr_const (file, address.offset);
 		}
 	      /* Print the ",index" component, if any.  */
 	      if (address.index)
@@ -4580,7 +5183,8 @@
   default:
     break;
   }
-  return gen_rtx_REG (mode, D0_REG);
+
+  return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
 }

 rtx
@@ -4846,9 +5450,8 @@
 	  return OP_TYPE_IMM_L;

 	default:
-	  if (GET_CODE (op) == SYMBOL_REF)
-	    /* ??? Just a guess.  Probably we can guess better using length
-	       attribute of the instructions.  */
+	  if (symbolic_operand (m68k_unwrap_symbol (op, false), VOIDmode))
+	    /* Just a guess.  */
 	    return OP_TYPE_IMM_W;

 	  return OP_TYPE_IMM_L;
@@ -5793,3 +6396,5 @@
       return 0;
     }
 }
+
+#include "gt-m68k.h"
--- a/gcc/config/m68k/m68k-devices.def
+++ b/gcc/config/m68k/m68k-devices.def
@@ -72,8 +72,8 @@
 /* 680x0 series processors.  */
 M68K_DEVICE ("68000", m68000,   "68000", "68000", 68000,    isa_00,    0)
 M68K_DEVICE ("68010", m68010,   "68010", "68000", 68010,    isa_10,    0)
-M68K_DEVICE ("68020", m68020,   "68020", "68020", 68020,    isa_20,    FL_MMU)
-M68K_DEVICE ("68030", m68030,   "68030", "68020", 68030,    isa_20,    FL_MMU)
+M68K_DEVICE ("68020", m68020,   "68020", "68020", 68020,    isa_20,    FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("68030", m68030,   "68030", "68020", 68030,    isa_20,    FL_MMU | FL_UCLINUX)
 M68K_DEVICE ("68040", m68040,   "68040", "68040", 68040,    isa_40,    FL_MMU)
 M68K_DEVICE ("68060", m68060,   "68060", "68060", 68060,    isa_40,    FL_MMU)
 M68K_DEVICE ("68302", m68302,   "68302", "68000", 68000,    isa_00,    FL_MMU)
@@ -81,7 +81,13 @@
 M68K_DEVICE ("cpu32", cpu32,    "cpu32", "cpu32", cpu32,    isa_cpu32, FL_MMU)

 /* ColdFire CFV1 processor.  */
-M68K_DEVICE ("51qe",  mcf51qe,  "51qe",  "51qe",  cfv1,     isa_c,     FL_CF_USP)
+/* For historical reasons, the 51 multilib is named 51qe.  */
+M68K_DEVICE ("51",    mcf51,    "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51ac",  mcf51ac,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51cn",  mcf51cn,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51em",  mcf51em,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_MAC)
+M68K_DEVICE ("51jm",  mcf51jm,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51qe",  mcf51qe,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)

 /* ColdFire CFV2 processors.  */
 M68K_DEVICE ("5202",  mcf5202,  "5206",  "5206",  cfv2,     isa_a,     0)
@@ -97,6 +103,7 @@
 M68K_DEVICE ("5213",  mcf5213,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("5214",  mcf5214,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5216",  mcf5216,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5221x", mcf5221x, "5221x", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("52221", mcf52221, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("52223", mcf52223, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("52230", mcf52230, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
@@ -107,6 +114,14 @@
 M68K_DEVICE ("52235", mcf52235, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5224",  mcf5224,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("5225",  mcf5225,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52252", mcf52252, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52254", mcf52254, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52255", mcf52255, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52256", mcf52256, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52258", mcf52258, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52259", mcf52259, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52274", mcf52274, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52277", mcf52277, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5232",  mcf5232,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5233",  mcf5233,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5234",  mcf5234,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
@@ -126,6 +141,13 @@
 M68K_DEVICE ("528x",  mcf528x,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)

 /* CFV3 processors.  */
+M68K_DEVICE ("53011", mcf53011, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53012", mcf53012, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53013", mcf53013, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53014", mcf53014, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53015", mcf53015, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53016", mcf53016, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53017", mcf53017, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5307",  mcf5307,  "5307",  "5307",  cfv3,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
 M68K_DEVICE ("5327",  mcf5327,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
 M68K_DEVICE ("5328",  mcf5328,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
@@ -137,12 +159,17 @@

 /* CFV4/CFV4e processors.  */
 M68K_DEVICE ("5407",  mcf5407,  "5407",  "5407",  cfv4,     isa_b,     FL_CF_MAC)
-M68K_DEVICE ("54450", mcf54450, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
-M68K_DEVICE ("54451", mcf54451, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
-M68K_DEVICE ("54452", mcf54452, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
-M68K_DEVICE ("54453", mcf54453, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
-M68K_DEVICE ("54454", mcf54454, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
-M68K_DEVICE ("54455", mcf54455, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU)
+M68K_DEVICE ("54410", mcf54410, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54415", mcf54415, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54416", mcf54416, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54417", mcf54417, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54418", mcf54418, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54450", mcf54450, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54451", mcf54451, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54452", mcf54452, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54453", mcf54453, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54454", mcf54454, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54455", mcf54455, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
 M68K_DEVICE ("5470",  mcf5470,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
 M68K_DEVICE ("5471",  mcf5471,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
 M68K_DEVICE ("5472",  mcf5472,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
--- a/gcc/config/m68k/m68k.h
+++ b/gcc/config/m68k/m68k.h
@@ -232,6 +232,7 @@
 #define FL_ISA_C     (1 << 16)
 #define FL_FIDOA     (1 << 17)
 #define FL_MMU 	     0   /* Used by multilib machinery.  */
+#define FL_UCLINUX   0   /* Used by multilib machinery.  */

 #define TARGET_68010		((m68k_cpu_flags & FL_ISA_68010) != 0)
 #define TARGET_68020		((m68k_cpu_flags & FL_ISA_68020) != 0)
@@ -501,7 +502,8 @@

 extern enum reg_class regno_reg_class[];
 #define REGNO_REG_CLASS(REGNO) (regno_reg_class[(REGNO)])
-#define INDEX_REG_CLASS GENERAL_REGS
+#define MODE_INDEX_REG_CLASS(MODE) \
+  (MODE_OK_FOR_INDEX_P (MODE) ? GENERAL_REGS : NO_REGS)
 #define BASE_REG_CLASS ADDR_REGS

 #define PREFERRED_RELOAD_CLASS(X,CLASS) \
@@ -644,7 +646,7 @@
    (though the operand list is empty).  */
 #define TRANSFER_FROM_TRAMPOLINE				\
 void								\
-__transfer_from_trampoline ()					\
+__transfer_from_trampoline (void)				\
 {								\
   register char *a0 asm (M68K_STATIC_CHAIN_REG_NAME);		\
   asm (GLOBAL_ASM_OP "___trampoline");				\
@@ -675,6 +677,10 @@
 #define HAVE_POST_INCREMENT 1
 #define HAVE_PRE_DECREMENT 1

+/* Return true if addresses of mode MODE can have an index register.  */
+#define MODE_OK_FOR_INDEX_P(MODE) \
+  (!TARGET_COLDFIRE_FPU || GET_MODE_CLASS (MODE) != MODE_FLOAT)
+
 /* Macros to check register numbers against specific register classes.  */

 /* True for data registers, D0 through D7.  */
@@ -689,9 +695,10 @@
 /* True for floating point registers, FP0 through FP7.  */
 #define FP_REGNO_P(REGNO)	IN_RANGE (REGNO, 16, 23)

-#define REGNO_OK_FOR_INDEX_P(REGNO)			\
-  (INT_REGNO_P (REGNO)					\
-   || INT_REGNO_P (reg_renumber[REGNO]))
+#define REGNO_MODE_OK_FOR_INDEX_P(REGNO, MODE)		\
+  (MODE_OK_FOR_INDEX_P (MODE)				\
+   && (INT_REGNO_P (REGNO)				\
+       || INT_REGNO_P (reg_renumber[REGNO])))

 #define REGNO_OK_FOR_BASE_P(REGNO)			\
   (ADDRESS_REGNO_P (REGNO)				\
@@ -751,13 +758,14 @@

 #define LEGITIMATE_PIC_OPERAND_P(X)				\
   (!symbolic_operand (X, VOIDmode)				\
-   || (TARGET_PCREL && REG_STRICT_P))
+   || (TARGET_PCREL && REG_STRICT_P)				\
+   || m68k_tls_reference_p (X, true))

 #define REG_OK_FOR_BASE_P(X) \
   m68k_legitimate_base_reg_p (X, REG_STRICT_P)

-#define REG_OK_FOR_INDEX_P(X) \
-  m68k_legitimate_index_reg_p (X, REG_STRICT_P)
+#define REG_MODE_OK_FOR_INDEX_P(X, MODE)	\
+  m68k_legitimate_index_reg_p (MODE, X, REG_STRICT_P)

 #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)				\
   do									\
@@ -770,52 +778,19 @@
 /* This address is OK as it stands.  */
 #define PIC_CASE_VECTOR_ADDRESS(index) index

-/* For the 68000, we handle X+REG by loading X into a register R and
-   using R+REG.  R will go in an address reg and indexing will be used.
-   However, if REG is a broken-out memory address or multiplication,
-   nothing needs to be done because REG can certainly go in an address reg.  */
-#define COPY_ONCE(Y) if (!copied) { Y = copy_rtx (Y); copied = ch = 1; }
-#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN)   \
-{ register int ch = (X) != (OLDX);					\
-  if (GET_CODE (X) == PLUS)						\
-    { int copied = 0;							\
-      if (GET_CODE (XEXP (X, 0)) == MULT)				\
-	{ COPY_ONCE (X); XEXP (X, 0) = force_operand (XEXP (X, 0), 0);}	\
-      if (GET_CODE (XEXP (X, 1)) == MULT)				\
-	{ COPY_ONCE (X); XEXP (X, 1) = force_operand (XEXP (X, 1), 0);}	\
-      if (ch && GET_CODE (XEXP (X, 1)) == REG				\
-	  && GET_CODE (XEXP (X, 0)) == REG)				\
-	{ if (TARGET_COLDFIRE_FPU					\
-	      && GET_MODE_CLASS (MODE) == MODE_FLOAT)			\
-	    { COPY_ONCE (X); X = force_operand (X, 0);}			\
-	  goto WIN; }							\
-      if (ch) { GO_IF_LEGITIMATE_ADDRESS (MODE, X, WIN); }		\
-      if (GET_CODE (XEXP (X, 0)) == REG					\
-	       || (GET_CODE (XEXP (X, 0)) == SIGN_EXTEND		\
-		   && GET_CODE (XEXP (XEXP (X, 0), 0)) == REG		\
-		   && GET_MODE (XEXP (XEXP (X, 0), 0)) == HImode))	\
-	{ register rtx temp = gen_reg_rtx (Pmode);			\
-	  register rtx val = force_operand (XEXP (X, 1), 0);		\
-	  emit_move_insn (temp, val);					\
-	  COPY_ONCE (X);						\
-	  XEXP (X, 1) = temp;						\
-	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT \
-	      && GET_CODE (XEXP (X, 0)) == REG)				\
-	    X = force_operand (X, 0);					\
-	  goto WIN; }							\
-      else if (GET_CODE (XEXP (X, 1)) == REG				\
-	       || (GET_CODE (XEXP (X, 1)) == SIGN_EXTEND		\
-		   && GET_CODE (XEXP (XEXP (X, 1), 0)) == REG		\
-		   && GET_MODE (XEXP (XEXP (X, 1), 0)) == HImode))	\
-	{ register rtx temp = gen_reg_rtx (Pmode);			\
-	  register rtx val = force_operand (XEXP (X, 0), 0);		\
-	  emit_move_insn (temp, val);					\
-	  COPY_ONCE (X);						\
-	  XEXP (X, 0) = temp;						\
-	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (MODE) == MODE_FLOAT \
-	      && GET_CODE (XEXP (X, 1)) == REG)				\
-	    X = force_operand (X, 0);					\
-	  goto WIN; }}}
+#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN)		\
+do {							\
+  rtx __x;						\
+  							\
+  __x = m68k_legitimize_address (X, OLDX, MODE);	\
+  if (__x != NULL_RTX)					\
+    {							\
+      X = __x;						\
+							\
+      if (memory_address_p (MODE, X))			\
+	goto WIN;					\
+    }							\
+} while (0)

 /* On the 68000, only predecrement and postincrement address depend thus
    (the amount of decrement or increment being the length of the operand).
@@ -1028,6 +1003,9 @@
   assemble_name ((FILE), (NAME)),		\
   fprintf ((FILE), ",%u\n", (int)(ROUNDED)))

+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  m68k_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
 /* On the 68000, we use several CODE characters:
    '.' for dot needed in Motorola-style opcode names.
    '-' for an operand pushing on the stack:
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -116,7 +116,8 @@
    (UNSPEC_GOT 3)
    (UNSPEC_IB 4)
    (UNSPEC_TIE 5)
-   (UNSPEC_GOTOFF 6)
+   (UNSPEC_RELOC16 6)
+   (UNSPEC_RELOC32 7)
   ])

 ;; UNSPEC_VOLATILE usage:
@@ -414,7 +415,7 @@

 (define_insn "tst<mode>_cf"
   [(set (cc0)
-	(match_operand:FP 0 "general_operand" "f<FP:dreg><Q>U"))]
+	(match_operand:FP 0 "general_operand" "f<FP:dreg>m"))]
   "TARGET_COLDFIRE_FPU"
 {
   cc_status.flags = CC_IN_68881;
@@ -570,8 +571,8 @@

 (define_insn "*cmp<mode>_cf"
   [(set (cc0)
-	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg><Q>U")
-		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg><Q>U,f")))]
+	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg>m")
+		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg>m,f")))]
   "TARGET_COLDFIRE_FPU
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
@@ -779,7 +780,41 @@
 {
   rtx tmp, base, offset;

-  if (flag_pic && !TARGET_PCREL && symbolic_operand (operands[1], SImode))
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (!TARGET_PCREL && m68k_tls_reference_p (operands[1], false))
+    {
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+        {
+          addend = XEXP (XEXP (tmp, 0), 1);
+          tmp = XEXP (XEXP (tmp, 0), 0);
+        }
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
+
+      tmp = m68k_legitimize_tls_address (tmp);
+
+      if (addend)
+        {
+	  if (!REG_P (tmp))
+	    {
+	      rtx reg;
+
+	      reg = gen_reg_rtx (Pmode);
+	      emit_move_insn (reg, tmp);
+	      tmp = reg;
+	    }
+
+          tmp = gen_rtx_PLUS (SImode, tmp, addend);
+	}
+
+      operands[1] = tmp;
+    }
+  else if (flag_pic && !TARGET_PCREL && symbolic_operand (operands[1], SImode))
     {
       /* The source is an address which requires PIC relocation.
          Call legitimize_pic_address with the source, mode, and a relocation
@@ -1070,10 +1105,8 @@
 ;; SFmode MEMs are restricted to modes 2-4 if TARGET_COLDFIRE_FPU.
 ;; The move instructions can handle all combinations.
 (define_insn "movsf_cf_hard"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=r<Q>U, f,    f,mr,f,r<Q>,f
-,m")
-        (match_operand:SF 1 "general_operand"      " f,     r<Q>U,f,rm,F,F,   m
-,f"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,f, f,rm,f,r<Q>,f,m")
+        (match_operand:SF 1 "general_operand"      " f, rm,f,rm,F,F,   m,f"))]
   "TARGET_COLDFIRE_FPU"
 {
   if (which_alternative == 4 || which_alternative == 5) {
@@ -1215,8 +1248,8 @@
 })

 (define_insn "movdf_cf_hard"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,    <Q>U,r,f,r,r,m,f")
-        (match_operand:DF 1 "general_operand"      " f<Q>U,f,   f,r,r,m,r,E"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f, m,r,f,r,r,m,f")
+        (match_operand:DF 1 "general_operand"      " fm,f,f,r,r,m,r,E"))]
   "TARGET_COLDFIRE_FPU"
 {
   rtx xoperands[3];
@@ -1857,7 +1890,7 @@
 (define_insn "extendsfdf2_cf"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f")
 	(float_extend:DF
-	 (match_operand:SF 1 "general_operand" "f,<Q>U")))]
+	 (match_operand:SF 1 "general_operand" "f,m")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
@@ -1897,9 +1930,9 @@
 })

 (define_insn "truncdfsf2_cf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,d<Q>U")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,dm")
 	(float_truncate:SF
-	  (match_operand:DF 1 "general_operand" "<Q>U,f")))]
+	  (match_operand:DF 1 "general_operand" "m,f")))]
   "TARGET_COLDFIRE_FPU"
   "@
   fsmove%.d %1,%0
@@ -2045,7 +2078,7 @@

 (define_insn "ftrunc<mode>2_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
-        (fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+        (fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[1]))
@@ -2338,9 +2371,9 @@
   "* return output_addsi3 (operands);")

 (define_insn_and_split "*addsi3_5200"
-  [(set (match_operand:SI 0 "nonimmediate_operand"         "=mr,mr,a,m,r,  ?a, ?a,?a,?a")
-	(plus:SI (match_operand:SI 1 "general_operand"     "%0, 0, 0,0,0,   a,  a, r, a")
-		 (match_operand:SI 2 "general_src_operand" " I, L, J,d,mrKi,Cj, r, a, J")))]
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=mr,mr,a,  m,r,  ?a, ?a,?a,?a")
+	(plus:SI (match_operand:SI 1 "general_operand"     "%0, 0, 0,  0,0,   a,  a, r, a")
+		 (match_operand:SI 2 "general_src_operand" " I, L, JCu,d,mrKi,Cj, r, a, JCu")))]
   "TARGET_COLDFIRE"
 {
   switch (which_alternative)
@@ -2382,9 +2415,9 @@
 	(plus:SI (match_dup 0)
 		 (match_dup 1)))]
   ""
-  [(set_attr "type" "aluq_l,aluq_l,lea,alu_l,alu_l,*,lea,lea,lea")
-   (set_attr "opy" "2,2,*,2,2,*,*,*,*")
-   (set_attr "opy_type" "*,*,mem5,*,*,*,mem6,mem6,mem5")])
+  [(set_attr "type"     "aluq_l,aluq_l,lea, alu_l,alu_l,*,lea, lea, lea")
+   (set_attr "opy"      "2,     2,     *,   2,    2,    *,*,   *,   *")
+   (set_attr "opy_type" "*,     *,     mem5,*,    *,    *,mem6,mem6,mem5")])

 (define_insn ""
   [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
@@ -2666,7 +2699,7 @@
 (define_insn "add<mode>3_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
 	(plus:FP (match_operand:FP 1 "general_operand" "%0")
-		 (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+		 (match_operand:FP 2 "general_operand" "f<FP:dreg>m")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[2]))
@@ -2889,7 +2922,7 @@
 (define_insn "sub<mode>3_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
         (minus:FP (match_operand:FP 1 "general_operand" "0")
-                  (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+                  (match_operand:FP 2 "general_operand" "f<FP:dreg>m")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[2]))
@@ -3245,7 +3278,7 @@
 (define_insn "fmul<mode>3_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
 	(mult:FP (match_operand:FP 1 "general_operand" "%0")
-		 (match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+		 (match_operand:FP 2 "general_operand" "fm<FP:dreg>")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[2]))
@@ -3315,7 +3348,7 @@
 (define_insn "div<mode>3_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
 	(div:FP (match_operand:FP 1 "general_operand" "0")
-		(match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+		(match_operand:FP 2 "general_operand" "fm<FP:dreg>")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[2]))
@@ -4163,7 +4196,7 @@

 (define_insn "neg<mode>2_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
-	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m,0")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (DATA_REG_P (operands[0]))
@@ -4197,7 +4230,7 @@

 (define_insn "sqrt<mode>2_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
-	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (FP_REG_P (operands[1]))
@@ -4316,7 +4349,7 @@

 (define_insn "abs<mode>2_cf"
   [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
-	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m,0")))]
   "TARGET_COLDFIRE_FPU"
 {
   if (DATA_REG_P (operands[0]))
--- a/gcc/config/m68k/m68k.opt
+++ b/gcc/config/m68k/m68k.opt
@@ -182,3 +182,7 @@
 mxgot
 Target Report Mask(XGOT)
 Support more than 8192 GOT entries on ColdFire
+
+mxtls
+Target Report Mask(XTLS)
+Support TLS segment larger than 64K
--- a/gcc/config/m68k/m68k-protos.h
+++ b/gcc/config/m68k/m68k-protos.h
@@ -54,19 +54,27 @@
 extern bool m68k_output_addr_const_extra (FILE *, rtx);
 extern void notice_update_cc (rtx, rtx);
 extern bool m68k_legitimate_base_reg_p (rtx, bool);
-extern bool m68k_legitimate_index_reg_p (rtx, bool);
+extern bool m68k_legitimate_index_reg_p (enum machine_mode, rtx, bool);
 extern bool m68k_illegitimate_symbolic_constant_p (rtx);
 extern bool m68k_legitimate_address_p (enum machine_mode, rtx, bool);
 extern bool m68k_matches_q_p (rtx);
 extern bool m68k_matches_u_p (rtx);
 extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern rtx m68k_legitimize_tls_address (rtx);
+extern bool m68k_tls_reference_p (rtx, bool);
+extern rtx m68k_legitimize_address (rtx, rtx, enum machine_mode);
 extern int valid_dbcc_comparison_p_2 (rtx, enum machine_mode);
 extern rtx m68k_libcall_value (enum machine_mode);
 extern rtx m68k_function_value (const_tree, const_tree);
 extern int emit_move_sequence (rtx *, enum machine_mode, rtx);
 extern bool m68k_movem_pattern_p (rtx, rtx, HOST_WIDE_INT, bool);
 extern const char *m68k_output_movem (rtx *, rtx, HOST_WIDE_INT, bool);
+extern void m68k_final_prescan_insn (rtx, rtx *, int);

+/* Functions from m68k.c used in constraints.md.  */
+extern rtx m68k_unwrap_symbol (rtx, bool);
+
+/* Functions from m68k.c used in genattrtab.  */
 #ifdef HAVE_ATTR_cpu
 extern enum attr_cpu m68k_sched_cpu;
 extern enum attr_mac m68k_sched_mac;
--- a/gcc/config/m68k/predicates.md
+++ b/gcc/config/m68k/predicates.md
@@ -130,7 +130,9 @@
   (match_code "sign_extend,zero_extend"))

 ;; Returns true if OP is either a symbol reference or a sum of a
-;; symbol reference and a constant.
+;; symbol reference and a constant.  This predicate is for "raw"
+;; symbol references not yet processed by legitimize*_address,
+;; hence we do not handle UNSPEC_{XGOT, TLS, XTLS} here.

 (define_predicate "symbolic_operand"
   (match_code "symbol_ref,label_ref,const")
--- a/gcc/config/m68k/t-uclinux
+++ b/gcc/config/m68k/t-uclinux
@@ -1,8 +1,8 @@
 # crti and crtn are provided by uClibc.
 EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o

-# Only include multilibs for the 68020 and for CPUs without an MMU.
-M68K_MLIB_CPU += && (MLIB == "68020" || !match(FLAGS, "FL_MMU"))
+# Include multilibs for CPUs without an MMU or with FL_UCLINUX
+M68K_MLIB_CPU += && (!match(FLAGS, "FL_MMU") || match(FLAGS, "FL_UCLINUX"))

 # Add multilibs for execute-in-place and shared-library code.
 M68K_MLIB_OPTIONS += msep-data/mid-shared-library
--- a/gcc/config/mips/74k.md
+++ b/gcc/config/mips/74k.md
@@ -118,8 +118,7 @@
 ;; stores
 (define_insn_reservation "r74k_int_store" 1
   (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
-       (and (eq_attr "type" "store")
-            (eq_attr "mode" "!unknown")))
+       (eq_attr "type" "store"))
   "r74k_agen")


@@ -145,33 +144,123 @@
 ;; load->load base:  4 cycles
 ;; load->store base: 4 cycles
 (define_bypass 4 "r74k_int_load" "r74k_int_load")
-(define_bypass 4 "r74k_int_load" "r74k_int_store" "!store_data_bypass_p")
+(define_bypass 4 "r74k_int_load" "r74k_int_store" "!mips_store_data_bypass_p")

 ;; logical/move/slt/signext->next use :  1 cycles (Default)
 ;; logical/move/slt/signext->load base:  2 cycles
 ;; logical/move/slt/signext->store base: 2 cycles
 (define_bypass 2 "r74k_int_logical" "r74k_int_load")
-(define_bypass 2 "r74k_int_logical" "r74k_int_store" "!store_data_bypass_p")
+(define_bypass 2 "r74k_int_logical" "r74k_int_store"
+  "!mips_store_data_bypass_p")

 ;; arith->next use :  2 cycles (Default)
 ;; arith->load base:  3 cycles
 ;; arith->store base: 3 cycles
 (define_bypass 3 "r74k_int_arith" "r74k_int_load")
-(define_bypass 3 "r74k_int_arith" "r74k_int_store" "!store_data_bypass_p")
+(define_bypass 3 "r74k_int_arith" "r74k_int_store" "!mips_store_data_bypass_p")

 ;; cmove->next use :  4 cycles (Default)
 ;; cmove->load base:  5 cycles
 ;; cmove->store base: 5 cycles
 (define_bypass 5 "r74k_int_cmove"  "r74k_int_load")
-(define_bypass 5 "r74k_int_cmove"  "r74k_int_store" "!store_data_bypass_p")
+(define_bypass 5 "r74k_int_cmove"  "r74k_int_store"
+  "!mips_store_data_bypass_p")

 ;; mult/madd/msub->int_mfhilo  : 4 cycles (default)
 ;; mult->madd/msub             : 1 cycles
 ;; madd/msub->madd/msub        : 1 cycles
-(define_bypass 1 "r74k_int_mult,r74k_int_mul3" "r74k_int_madd"
-  "mips_linked_madd_p")
-(define_bypass 1 "r74k_int_madd" "r74k_int_madd"
-  "mips_linked_madd_p")
+(define_bypass 1 "r74k_int_mult" "r74k_int_madd")
+(define_bypass 1 "r74k_int_madd" "r74k_int_madd")
+
+(define_bypass 1 "r74k_int_mul3" "r74k_int_madd"
+  "mips_mult_madd_chain_bypass_p")
+
+
+;; --------------------------------------------------------------
+;; DSP instructins
+;; --------------------------------------------------------------
+
+;; Non-saturating insn have the same latency as normal ALU operations,
+(define_insn_reservation "r74k_dsp_alu" 2
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspalu"))
+  "r74k_alu")
+
+;; Saturating insn takes an extra cycle.
+(define_insn_reservation "r74k_dsp_alu_sat" 3
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspalusat"))
+  "r74k_alu")
+
+;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq
+;; - delivers result to hi/lo in 6 cycle (bypass at M4)
+(define_insn_reservation "r74k_dsp_mac" 6
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspmac"))
+  "r74k_alu+r74k_mul")
+
+;; dpaq_sa, dpsq_sa, maq_sa
+;; - delivers result to hi/lo in 7 cycle (bypass at WB)
+(define_insn_reservation "r74k_dsp_mac_sat" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "dspmacsat"))
+  "r74k_alu+r74k_mul")
+
+;; extp, extpdp, extpdpv, extpv, extr, extrv
+;; - same latency as "mul"
+(define_insn_reservation "r74k_dsp_acc_ext" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "accext"))
+  "r74k_alu+r74k_mul")
+
+;; mthlip, shilo, shilov
+;; - same latency as "mul"
+(define_insn_reservation "r74k_dsp_acc_mod" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "accmod"))
+  "r74k_alu+r74k_mul")
+
+;; dspalu   ->load/store base
+;; dspalusat->load/store base
+;; - we should never see these in real life.
+
+;; dsp_mac->dsp_mac     : 1 cycles (repeat rate of 1)
+;; dsp_mac->dsp_mac_sat : 1 cycles (repeat rate of 1)
+(define_bypass 1 "r74k_dsp_mac" "r74k_dsp_mac")
+(define_bypass 1 "r74k_dsp_mac" "r74k_dsp_mac_sat")
+
+;; dsp_mac_sat->dsp_mac_sat : 2 cycles (repeat rate of 2)
+;; dsp_mac_sat->dsp_mac     : 2 cycles (repeat rate of 2)
+(define_bypass 2 "r74k_dsp_mac_sat" "r74k_dsp_mac_sat")
+(define_bypass 2 "r74k_dsp_mac_sat" "r74k_dsp_mac")
+
+(define_bypass 1 "r74k_int_mult" "r74k_dsp_mac")
+(define_bypass 1 "r74k_int_mult" "r74k_dsp_mac_sat")
+
+;; Before reload, all multiplier is registered as imul3 (which has a long
+;;  latency).  We temporary jig the latency such that the macc groups
+;;  are scheduled closely together during the first scheduler pass.
+(define_bypass 1 "r74k_int_mul3" "r74k_dsp_mac"
+  "mips_mult_madd_chain_bypass_p")
+(define_bypass 1 "r74k_int_mul3" "r74k_dsp_mac_sat"
+  "mips_mult_madd_chain_bypass_p")
+
+;; Assuming the following is true (bypass at M4)
+;;  AP AF AM MB M1 M2 M3 M4 WB GR GC
+;;              AP AF AM MB M1 M2 M3 M4 WB GR GC
+;; dsp_mac->dsp_acc_ext : 4 cycles
+;; dsp_mac->dsp_acc_mod : 4 cycles
+(define_bypass 4 "r74k_dsp_mac" "r74k_dsp_acc_ext")
+(define_bypass 4 "r74k_dsp_mac" "r74k_dsp_acc_mod")
+
+;; Assuming the following is true (bypass at WB)
+;;  AP AF AM MB M1 M2 M3 M4 WB GR GC
+;;                 AP AF AM MB M1 M2 M3 M4 WB GR GC
+;; dsp_mac_sat->dsp_acc_ext : 5 cycles
+;; dsp_mac_sat->dsp_acc_mod : 5 cycles
+(define_bypass 5 "r74k_dsp_mac_sat" "r74k_dsp_acc_ext")
+(define_bypass 5 "r74k_dsp_mac_sat" "r74k_dsp_acc_mod")
+

 ;; --------------------------------------------------------------
 ;; Floating Point Instructions
--- /dev/null
+++ b/gcc/config/mips/crtfastmath.c
@@ -0,0 +1,53 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License
+   and a copy of the GCC Runtime Library Exception along with this
+   program; see the files COPYING3 and COPYING.RUNTIME respectively.
+   If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef __mips_hard_float
+
+/* flush denormalized numbers to zero */
+#define _FPU_FLUSH_TZ   0x1000000
+
+/* rounding control */
+#define _FPU_RC_NEAREST 0x0     /* RECOMMENDED */
+#define _FPU_RC_ZERO    0x1
+#define _FPU_RC_UP      0x2
+#define _FPU_RC_DOWN    0x3
+
+/* enable interrupts for IEEE exceptions */
+#define _FPU_IEEE     0x00000F80
+
+/* Macros for accessing the hardware control word.  */
+#define _FPU_GETCW(cw) __asm__ ("cfc1 %0,$31" : "=r" (cw))
+#define _FPU_SETCW(cw) __asm__ ("ctc1 %0,$31" : : "r" (cw))
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+  unsigned int fcr;
+
+  /* fastmath: flush to zero, round to nearest, ieee exceptions disabled */
+  fcr = _FPU_FLUSH_TZ | _FPU_RC_NEAREST;
+
+  _FPU_SETCW(fcr);
+}
+
+#endif /* __mips_hard_float */
--- a/gcc/config/mips/linux64.h
+++ b/gcc/config/mips/linux64.h
@@ -69,3 +69,9 @@
    ieee_quad_format is the default, but let's put this here to make
    sure nobody thinks we just forgot to set it to something else.  */
 #define MIPS_TFMODE_FORMAT mips_quad_format
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -147,3 +147,17 @@
 #define DRIVER_SELF_SPECS \
   BASE_DRIVER_SELF_SPECS, \
   LINUX_DRIVER_SELF_SPECS
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS                                      \
+do {                                                                    \
+  /* __thread_support is not supported by uClibc.  */                   \
+  if (linux_uclibc)                                             \
+    targetm.have_tls = 0;                                               \
+} while (0)
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -261,18 +261,29 @@
   /* Likewise FPR X.  */
   unsigned int fmask;

-  /* The number of GPRs and FPRs saved.  */
+  /* Likewise doubleword accumulator X ($acX).  */
+  unsigned int acc_mask;
+
+  /* The number of GPRs, FPRs, doubleword accumulators and COP0
+     registers saved.  */
   unsigned int num_gp;
   unsigned int num_fp;
+  unsigned int num_acc;
+  unsigned int num_cop0_regs;

-  /* The offset of the topmost GPR and FPR save slots from the top of
-     the frame, or zero if no such slots are needed.  */
+  /* The offset of the topmost GPR, FPR, accumulator and COP0-register
+     save slots from the top of the frame, or zero if no such slots are
+     needed.  */
   HOST_WIDE_INT gp_save_offset;
   HOST_WIDE_INT fp_save_offset;
+  HOST_WIDE_INT acc_save_offset;
+  HOST_WIDE_INT cop0_save_offset;

   /* Likewise, but giving offsets from the bottom of the frame.  */
   HOST_WIDE_INT gp_sp_offset;
   HOST_WIDE_INT fp_sp_offset;
+  HOST_WIDE_INT acc_sp_offset;
+  HOST_WIDE_INT cop0_sp_offset;

   /* The offset of arg_pointer_rtx from frame_pointer_rtx.  */
   HOST_WIDE_INT arg_pointer_offset;
@@ -310,6 +321,20 @@
   /* True if we have emitted an instruction to initialize
      mips16_gp_pseudo_rtx.  */
   bool initialized_mips16_gp_pseudo_p;
+
+  /* True if this is an interrupt handler.  */
+  bool interrupt_handler_p;
+
+  /* True if this is an interrupt handler that uses shadow registers.  */
+  bool use_shadow_register_set_p;
+
+  /* True if this is an interrupt handler that should keep interrupts
+     masked.  */
+  bool keep_interrupts_masked_p;
+
+  /* True if this is an interrupt handler that should use DERET
+     instead of ERET.  */
+  bool use_debug_exception_return_p;
 };

 /* Information about a single argument.  */
@@ -542,9 +567,16 @@
   ALL_REGS,	ALL_REGS,	ALL_REGS,	ALL_REGS
 };

+#ifdef CVMX_SHARED_BSS_FLAGS
+static tree octeon_handle_cvmx_shared_attribute (tree *, tree, tree, int, bool *);
+#endif
+
 /* The value of TARGET_ATTRIBUTE_TABLE.  */
 const struct attribute_spec mips_attribute_table[] = {
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+#ifdef CVMX_SHARED_BSS_FLAGS
+  { "cvmx_shared", 0, 0, true, false, false, octeon_handle_cvmx_shared_attribute },
+#endif
   { "long_call",   0, 0, false, true,  true,  NULL },
   { "far",     	   0, 0, false, true,  true,  NULL },
   { "near",        0, 0, false, true,  true,  NULL },
@@ -554,6 +586,11 @@
      code generation but don't carry other semantics.  */
   { "mips16", 	   0, 0, true,  false, false, NULL },
   { "nomips16",    0, 0, true,  false, false, NULL },
+  /* Allow functions to be specified as interrupt handlers */
+  { "interrupt",   0, 0, false, true,  true, NULL },
+  { "use_shadow_register_set",	0, 0, false, true,  true, NULL },
+  { "keep_interrupts_masked",	0, 0, false, true,  true, NULL },
+  { "use_debug_exception_return", 0, 0, false, true,  true, NULL },
   { NULL,	   0, 0, false, false, false, NULL }
 };

@@ -659,6 +696,11 @@
   { "74kx", PROCESSOR_74KF1_1, 33, 0 },
   { "74kf3_2", PROCESSOR_74KF3_2, 33, 0 },

+  { "1004kc", PROCESSOR_24KC, 33, 0 }, /* 1004K with MT/DSP.  */
+  { "1004kf2_1", PROCESSOR_24KF2_1, 33, 0 },
+  { "1004kf", PROCESSOR_24KF2_1, 33, 0 },
+  { "1004kf1_1", PROCESSOR_24KF1_1, 33, 0 },
+
   /* MIPS64 processors.  */
   { "5kc", PROCESSOR_5KC, 64, 0 },
   { "5kf", PROCESSOR_5KF, 64, 0 },
@@ -1064,13 +1106,7 @@
     DEFAULT_COSTS
   },
   { /* XLR */
-    /* Need to replace first five with the costs of calling the appropriate
-       libgcc routine.  */
-    COSTS_N_INSNS (256),          /* fp_add */
-    COSTS_N_INSNS (256),          /* fp_mult_sf */
-    COSTS_N_INSNS (256),          /* fp_mult_df */
-    COSTS_N_INSNS (256),          /* fp_div_sf */
-    COSTS_N_INSNS (256),          /* fp_div_df */
+    SOFT_FP_COSTS,
     COSTS_N_INSNS (8),            /* int_mult_si */
     COSTS_N_INSNS (8),            /* int_mult_di */
     COSTS_N_INSNS (72),           /* int_div_si */
@@ -1172,6 +1208,42 @@
   return lookup_attribute ("nomips16", DECL_ATTRIBUTES (decl)) != NULL;
 }

+/* Check if the interrupt attribute is set for a function.  */
+
+static bool
+mips_interrupt_type_p (tree type)
+{
+  return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use shadow register set is set for a function.  */
+
+static bool
+mips_use_shadow_register_set_p (tree type)
+{
+  return lookup_attribute ("use_shadow_register_set",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to keep interrupts masked is set for a function.  */
+
+static bool
+mips_keep_interrupts_masked_p (tree type)
+{
+  return lookup_attribute ("keep_interrupts_masked",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use debug exception return is set for
+   a function.  */
+
+static bool
+mips_use_debug_exception_return_p (tree type)
+{
+  return lookup_attribute ("use_debug_exception_return",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
 /* Return true if function DECL is a MIPS16 function.  Return the ambient
    setting if DECL is null.  */

@@ -2795,7 +2867,7 @@
 mips_legitimize_address (rtx *xloc, enum machine_mode mode)
 {
   rtx base, addr;
-  HOST_WIDE_INT offset;
+  HOST_WIDE_INT intval, high, offset;

   if (mips_tls_symbol_p (*xloc))
     {
@@ -2820,6 +2892,32 @@
       *xloc = mips_force_address (addr, mode);
       return true;
     }
+
+ /* Handle references to constant addresses by loading the high part
+    into a register and using an offset for the low part.  */
+ if (GET_CODE (base) == CONST_INT)
+   {
+     intval = INTVAL (base);
+     high = trunc_int_for_mode (CONST_HIGH_PART (intval), Pmode);
+     offset = CONST_LOW_PART (intval);
+     /* Ignore cases in which a positive address would be accessed by a
+	negative offset from a negative address.  The required wraparound
+	does not occur for 32-bit addresses on 64-bit targets, and it is
+	very unlikely that such an access would occur in real code anyway.
+
+	If the low offset is not legitimate for MODE, prefer to load
+	the constant normally, instead of using mips_force_address on
+	the legitimized address.  The latter option would cause us to
+	use (D)ADDIU unconditionally, but LUI/ORI is more efficient
+	than LUI/ADDIU on some targets.  */
+     if ((intval < 0 || high > 0)
+	  && mips_valid_offset_p (GEN_INT (offset), mode))
+	{
+	  base = mips_force_temporary (NULL, GEN_INT (high));
+	  *xloc = plus_constant (base, offset);
+	  return true;
+	}
+   }
   return false;
 }

@@ -6188,6 +6286,11 @@
   if (!TARGET_SIBCALLS)
     return false;

+  /* Interrupt handlers need special epilogue code and therefore can't
+     use sibcalls.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    return false;
+
   /* We can't do a sibcall if the called function is a MIPS16 function
      because there is no direct "jx" instruction equivalent to "jalx" to
      switch the ISA mode.  We only care about cases where the sibling
@@ -6608,6 +6711,15 @@
   if (!mips_get_unaligned_mem (&src, width, bitpos, &left, &right))
     return false;

+  if (ISA_HAS_UL_US)
+    {
+      if (GET_MODE (dest) == DImode)
+	emit_insn (gen_mov_uld (dest, src, left));
+      else
+	emit_insn (gen_mov_ulw (dest, src, left));
+      return true;
+    }
+
   temp = gen_reg_rtx (GET_MODE (dest));
   if (GET_MODE (dest) == DImode)
     {
@@ -6642,6 +6754,16 @@

   mode = mode_for_size (width, MODE_INT, 0);
   src = gen_lowpart (mode, src);
+
+  if (ISA_HAS_UL_US)
+    {
+      if (GET_MODE (src) == DImode)
+        emit_insn (gen_mov_usd (dest, src, left));
+      else
+        emit_insn (gen_mov_usw (dest, src, left));
+      return true;
+    }
+
   if (mode == DImode)
     {
       emit_insn (gen_mov_sdl (dest, src, left));
@@ -7229,7 +7351,11 @@
 		|| (letter == 'L' && TARGET_BIG_ENDIAN)
 		|| letter == 'D')
 	      regno++;
-	    fprintf (file, "%s", reg_names[regno]);
+	    /* We need to print $0 .. $31 for COP0 registers.  */
+	    if (COP0_REG_P (regno))
+	      fprintf (file, "$%s", &reg_names[regno][4]);
+	    else
+	      fprintf (file, "%s", reg_names[regno]);
 	  }
 	  break;

@@ -7369,6 +7495,12 @@
   if (TARGET_ABICALLS || TARGET_VXWORKS_RTP)
     return false;

+#ifdef CVMX_SHARED_BSS_FLAGS
+  if (TARGET_OCTEON && TREE_CODE (decl) == VAR_DECL
+      && lookup_attribute ("cvmx_shared", DECL_ATTRIBUTES (decl)))
+    return false;
+#endif
+
   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
     {
       const char *name;
@@ -7595,6 +7727,37 @@
   return NULL_RTX;
 }

+/* DSP ALU can bypass data with no delays for the following pairs. */
+enum insn_code dspalu_bypass_table[][2] =
+{
+  {CODE_FOR_mips_addsc, CODE_FOR_mips_addwc},
+  {CODE_FOR_mips_cmpu_eq_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmpu_lt_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmpu_le_qb, CODE_FOR_mips_pick_qb},
+  {CODE_FOR_mips_cmp_eq_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_cmp_lt_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_cmp_le_ph, CODE_FOR_mips_pick_ph},
+  {CODE_FOR_mips_wrdsp, CODE_FOR_mips_insv}
+};
+
+int
+mips_dspalu_bypass_p (rtx out_insn, rtx in_insn)
+{
+  int i;
+  int num_bypass = (sizeof (dspalu_bypass_table)
+		    / (2 * sizeof (enum insn_code)));
+  enum insn_code out_icode = INSN_CODE (out_insn);
+  enum insn_code in_icode = INSN_CODE (in_insn);
+
+  for (i = 0; i < num_bypass; i++)
+    {
+      if (out_icode == dspalu_bypass_table[i][0]
+	  && in_icode == dspalu_bypass_table[i][1])
+       return true;
+    }
+
+  return false;
+}
 /* Implement ASM_OUTPUT_ASCII.  */

 void
@@ -7819,11 +7982,19 @@
 		 "\t.previous\n", TARGET_LONG64 ? 64 : 32);

 #ifdef HAVE_AS_GNU_ATTRIBUTE
+#ifdef TARGET_MIPS_SDEMTK
+     fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
+             (!TARGET_NO_FLOAT
+              ? (TARGET_HARD_FLOAT
+                 ? (TARGET_DOUBLE_FLOAT
+                    ? ((!TARGET_64BIT && TARGET_FLOAT64) ? 4 : 1) : 2) : 3) : 0));
+#else
       fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
 	       (TARGET_HARD_FLOAT_ABI
 		? (TARGET_DOUBLE_FLOAT
 		   ? ((!TARGET_64BIT && TARGET_FLOAT64) ? 4 : 1) : 2) : 3));
 #endif
+#endif
     }

   /* If TARGET_ABICALLS, tell GAS to generate -KPIC code.  */
@@ -8436,12 +8607,53 @@
   return GLOBAL_POINTER_REGNUM;
 }

+/* Return true if REGNO is a register that is ordinarily call-clobbered
+   but must nevertheless be preserved by an interrupt handler.  */
+
+static bool
+mips_interrupt_extra_call_saved_reg_p (unsigned int regno)
+{
+  if (MD_REG_P (regno))
+    return true;
+
+  if (TARGET_DSP && DSP_ACC_REG_P (regno))
+    return true;
+
+  if (GP_REG_P (regno) && !cfun->machine->use_shadow_register_set_p)
+    {
+      /* $0 is hard-wired.  */
+      if (regno == GP_REG_FIRST)
+	return false;
+
+      /* The interrupt handler can treat kernel registers as
+	 scratch registers.  */
+      if (KERNEL_REG_P (regno))
+	return false;
+
+      /* The function will return the stack pointer to its original value
+	 anyway.  */
+      if (regno == STACK_POINTER_REGNUM)
+	return false;
+
+      /* Otherwise, return true for registers that aren't ordinarily
+	 call-clobbered.  */
+      return call_really_used_regs[regno];
+    }
+
+  return false;
+}
+
 /* Return true if the current function should treat register REGNO
    as call-saved.  */

 static bool
 mips_cfun_call_saved_reg_p (unsigned int regno)
 {
+  /* Interrupt handlers need to save extra registers.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
   /* call_insns preserve $28 unless they explicitly say otherwise,
      so call_really_used_regs[] treats $28 as call-saved.  However,
      we want the ABI property rather than the default call_insn
@@ -8490,6 +8702,13 @@
   if (regno == GP_REG_FIRST + 31 && mips16_cfun_returns_in_fpr_p ())
     return true;

+  /* If REGNO is ordinarily call-clobbered, we must assume that any
+     called function could modify it.  */
+  if (cfun->machine->interrupt_handler_p
+      && !current_function_is_leaf
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
   return false;
 }

@@ -8545,6 +8764,14 @@
       C |  callee-allocated save area   |
 	|  for register varargs         |
 	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx
+	|                               |       + cop0_sp_offset
+	|  COP0 reg save area           |	+ UNITS_PER_WORD
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx + acc_sp_offset
+	|                               |       + UNITS_PER_WORD
+	|  accumulator save area        |
+	|                               |
 	+-------------------------------+ <-- frame_pointer_rtx + fp_sp_offset
 	|                               |       + UNITS_PER_HWFPVALUE
 	|  FPR save area                |
@@ -8588,6 +8815,28 @@
   HOST_WIDE_INT offset, size;
   unsigned int regno, i;

+  /* Set this function's interrupt properties.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    {
+      if (!ISA_MIPS32R2)
+	error ("the %<interrupt%> attribute requires a MIPS32r2 processor");
+      else if (TARGET_HARD_FLOAT)
+	error ("the %<interrupt%> attribute requires %<-msoft-float%>");
+      else if (TARGET_MIPS16)
+	error ("interrupt handlers cannot be MIPS16 functions");
+      else
+	{
+	  cfun->machine->interrupt_handler_p = true;
+	  cfun->machine->use_shadow_register_set_p =
+	    mips_use_shadow_register_set_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->keep_interrupts_masked_p =
+	    mips_keep_interrupts_masked_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->use_debug_exception_return_p =
+	    mips_use_debug_exception_return_p (TREE_TYPE
+					       (current_function_decl));
+	}
+    }
+
   frame = &cfun->machine->frame;
   memset (frame, 0, sizeof (*frame));
   size = get_frame_size ();
@@ -8657,7 +8906,7 @@
     }

   /* Find out which FPRs we need to save.  This loop must iterate over
-     the same space as its companion in mips_for_each_saved_reg.  */
+     the same space as its companion in mips_for_each_saved_gpr_and_fpr.  */
   if (TARGET_HARD_FLOAT)
     for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno += MAX_FPRS_PER_FMT)
       if (mips_save_reg_p (regno))
@@ -8673,6 +8922,47 @@
       frame->fp_sp_offset = offset - UNITS_PER_HWFPVALUE;
     }

+  /* Add in space for the interrupt context information.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      /* Check HI/LO.  */
+      if (mips_save_reg_p (LO_REGNUM) || mips_save_reg_p (HI_REGNUM))
+	{
+	  frame->num_acc++;
+	  frame->acc_mask |= (1 << 0);
+	}
+
+      /* Check accumulators 1, 2, 3.  */
+      for (i = DSP_ACC_REG_FIRST; i <= DSP_ACC_REG_LAST; i += 2)
+	if (mips_save_reg_p (i) || mips_save_reg_p (i + 1))
+	  {
+	    frame->num_acc++;
+	    frame->acc_mask |= 1 << (((i - DSP_ACC_REG_FIRST) / 2) + 1);
+	  }
+
+      /* All interrupt context functions need space to preserve STATUS.  */
+      frame->num_cop0_regs++;
+
+      /* If we don't keep interrupts masked, we need to save EPC.  */
+      if (!cfun->machine->keep_interrupts_masked_p)
+	frame->num_cop0_regs++;
+    }
+
+  /* Move above the accumulator save area.  */
+  if (frame->num_acc > 0)
+    {
+      /* Each accumulator needs 2 words.  */
+      offset += frame->num_acc * 2 * UNITS_PER_WORD;
+      frame->acc_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Move above the COP0 register save area.  */
+  if (frame->num_cop0_regs > 0)
+    {
+      offset += frame->num_cop0_regs * UNITS_PER_WORD;
+      frame->cop0_sp_offset = offset - UNITS_PER_WORD;
+    }
+
   /* Move above the callee-allocated varargs save area.  */
   offset += MIPS_STACK_ALIGN (cfun->machine->varargs_size);
   frame->arg_pointer_offset = offset;
@@ -8686,6 +8976,10 @@
     frame->gp_save_offset = frame->gp_sp_offset - offset;
   if (frame->fp_sp_offset > 0)
     frame->fp_save_offset = frame->fp_sp_offset - offset;
+  if (frame->acc_sp_offset > 0)
+    frame->acc_save_offset = frame->acc_sp_offset - offset;
+  if (frame->num_cop0_regs > 0)
+    frame->cop0_save_offset = frame->cop0_sp_offset - offset;

   /* MIPS16 code offsets the frame pointer by the size of the outgoing
      arguments.  This tends to increase the chances of using unextended
@@ -8882,12 +9176,41 @@
   fn (gen_rtx_REG (mode, regno), mem);
 }

+/* Call FN for each accumlator that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+mips_for_each_saved_acc (HOST_WIDE_INT sp_offset, mips_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset;
+  int regno;
+
+  offset = cfun->machine->frame.acc_sp_offset - sp_offset;
+  if (BITSET_P (cfun->machine->frame.acc_mask, 0))
+    {
+      mips_save_restore_reg (word_mode, LO_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+      mips_save_restore_reg (word_mode, HI_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+    }
+
+  for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.acc_mask,
+		  ((regno - DSP_ACC_REG_FIRST) / 2) + 1))
+      {
+	mips_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+}
+
 /* Call FN for each register that is saved by the current function.
    SP_OFFSET is the offset of the current stack pointer from the start
    of the frame.  */

 static void
-mips_for_each_saved_reg (HOST_WIDE_INT sp_offset, mips_save_restore_fn fn)
+mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
+				 mips_save_restore_fn fn)
 {
   enum machine_mode fpr_mode;
   HOST_WIDE_INT offset;
@@ -9075,13 +9398,24 @@
     }
   else
     {
-      if (TARGET_MIPS16
-	  && REGNO (reg) != GP_REG_FIRST + 31
-	  && !M16_REG_P (REGNO (reg)))
-	{
-	  /* Save a non-MIPS16 register by moving it through a temporary.
-	     We don't need to do this for $31 since there's a special
-	     instruction for it.  */
+      if (REGNO (reg) == HI_REGNUM)
+	{
+	  if (TARGET_64BIT)
+	    emit_insn (gen_mfhidi_ti (MIPS_PROLOGUE_TEMP (DImode),
+				      gen_rtx_REG (TImode, MD_REG_FIRST)));
+	  else
+	    emit_insn (gen_mfhisi_di (MIPS_PROLOGUE_TEMP (SImode),
+				      gen_rtx_REG (DImode, MD_REG_FIRST)));
+	  mips_emit_move (mem, MIPS_PROLOGUE_TEMP (GET_MODE (reg)));
+	}
+      else if ((TARGET_MIPS16
+		&& REGNO (reg) != GP_REG_FIRST + 31
+		&& !M16_REG_P (REGNO (reg)))
+	       || ACC_REG_P (REGNO (reg)))
+	{
+	  /* If the register has no direct store instruction, move it
+	     through a temporary.  Note that there's a special MIPS16
+	     instruction to save $31.  */
 	  mips_emit_move (MIPS_PROLOGUE_TEMP (GET_MODE (reg)), reg);
 	  mips_emit_move (mem, MIPS_PROLOGUE_TEMP (GET_MODE (reg)));
 	}
@@ -9153,6 +9487,14 @@
     emit_insn (gen_loadgp_blockage ());
 }

+/* A for_each_rtx callback.  Stop the search if *X is a kernel register.  */
+
+static int
+mips_kernel_reg_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (*x) == REG && KERNEL_REG_P (REGNO (*x));
+}
+
 /* Expand the "prologue" pattern.  */

 void
@@ -9172,7 +9514,8 @@
   /* Save the registers.  Allocate up to MIPS_MAX_FIRST_STACK_STEP
      bytes beforehand; this is enough to cover the register save area
      without going out of range.  */
-  if ((frame->mask | frame->fmask) != 0)
+  if (((frame->mask | frame->fmask | frame->acc_mask) != 0)
+      || frame->num_cop0_regs > 0)
     {
       HOST_WIDE_INT step1;

@@ -9203,12 +9546,97 @@
  	}
       else
  	{
-	  insn = gen_add3_insn (stack_pointer_rtx,
-				stack_pointer_rtx,
-				GEN_INT (-step1));
-	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
-	  size -= step1;
-	  mips_for_each_saved_reg (size, mips_save_reg);
+	  if (cfun->machine->interrupt_handler_p)
+	    {
+	      HOST_WIDE_INT offset;
+	      rtx mem;
+
+	      /* If this interrupt is using a shadow register set, we need to
+		 get the stack pointer from the previous register set.  */
+	      if (cfun->machine->use_shadow_register_set_p)
+		emit_insn (gen_mips_rdpgpr (stack_pointer_rtx,
+					    stack_pointer_rtx));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Move from COP0 Cause to K0.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K0_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_CAUSE_REG_NUM)));
+		  /* Move from COP0 EPC to K1.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_EPC_REG_NUM)));
+		}
+
+	      /* Allocate the first part of the frame.  */
+	      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      size -= step1;
+
+	      /* Start at the uppermost location for saving.  */
+	      offset = frame->cop0_sp_offset - size;
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Push EPC into its stack slot.  */
+		  mem = gen_frame_mem (word_mode,
+				       plus_constant (stack_pointer_rtx,
+						      offset));
+		  mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+		  offset -= UNITS_PER_WORD;
+		}
+
+	      /* Move from COP0 Status to K1.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					gen_rtx_REG (SImode,
+						     COP0_STATUS_REG_NUM)));
+
+	      /* Right justify the RIPL in k0.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_lshrsi3 (gen_rtx_REG (SImode, K0_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM),
+					GEN_INT (CAUSE_IPL)));
+
+	      /* Push Status into its stack slot.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (stack_pointer_rtx, offset));
+	      mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+	      offset -= UNITS_PER_WORD;
+
+	      /* Insert the RIPL into our copy of SR (k1) as the new IPL.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (6),
+				       GEN_INT (SR_IPL),
+				       gen_rtx_REG (SImode, K0_REG_NUM)));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		/* Enable interrupts by clearing the KSU ERL and EXL bits.
+		   IE is already the correct value, so we don't have to do
+		   anything explicit.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (4),
+				       GEN_INT (SR_EXL),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	      else
+		/* Disable interrupts by clearing the KSU, ERL, EXL,
+		   and IE bits.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (5),
+				       GEN_INT (SR_IE),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	    }
+	  else
+	    {
+	      insn = gen_add3_insn (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      size -= step1;
+	    }
+	  mips_for_each_saved_acc (size, mips_save_reg);
+	  mips_for_each_saved_gpr_and_fpr (size, mips_save_reg);
 	}
     }

@@ -9293,6 +9721,20 @@
 			pic_offset_table_rtx);
     }

+  /* We need to search back to the last use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      for (insn = get_last_insn (); insn != NULL_RTX; insn = PREV_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN (insn), mips_kernel_reg_p, NULL))
+	  break;
+      /* Emit a move from K1 to COP0 Status after insn.  */
+      gcc_assert (insn != NULL_RTX);
+      emit_insn_after (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				      gen_rtx_REG (SImode, K1_REG_NUM)),
+		       insn);
+    }
+
   /* If we are profiling, make sure no instructions are scheduled before
      the call to mcount.  */
   if (crtl->profile)
@@ -9309,7 +9751,20 @@
   if (TARGET_MIPS16 && REGNO (reg) == GP_REG_FIRST + 31)
     reg = gen_rtx_REG (GET_MODE (reg), GP_REG_FIRST + 7);

-  if (TARGET_MIPS16 && !M16_REG_P (REGNO (reg)))
+  if (REGNO (reg) == HI_REGNUM)
+    {
+      mips_emit_move (MIPS_EPILOGUE_TEMP (GET_MODE (reg)), mem);
+      if (TARGET_64BIT)
+	emit_insn (gen_mthisi_di (gen_rtx_REG (TImode, MD_REG_FIRST),
+				  MIPS_EPILOGUE_TEMP (DImode),
+				  gen_rtx_REG (DImode, LO_REGNUM)));
+      else
+	emit_insn (gen_mthisi_di (gen_rtx_REG (DImode, MD_REG_FIRST),
+				  MIPS_EPILOGUE_TEMP (SImode),
+				  gen_rtx_REG (SImode, LO_REGNUM)));
+    }
+  else if ((TARGET_MIPS16 && !M16_REG_P (REGNO (reg)))
+	   || ACC_REG_P (REGNO (reg)))
     {
       /* Can't restore directly; move through a temporary.  */
       mips_emit_move (MIPS_EPILOGUE_TEMP (GET_MODE (reg)), mem);
@@ -9345,7 +9800,7 @@
 {
   const struct mips_frame_info *frame;
   HOST_WIDE_INT step1, step2;
-  rtx base, target;
+  rtx base, target, insn;

   if (!sibcall_p && mips_can_use_return_insn ())
     {
@@ -9378,7 +9833,8 @@

   /* If we need to restore registers, deallocate as much stack as
      possible in the second step without going out of range.  */
-  if ((frame->mask | frame->fmask) != 0)
+  if ((frame->mask | frame->fmask | frame->acc_mask) != 0
+      || frame->num_cop0_regs > 0)
     {
       step2 = MIN (step1, MIPS_MAX_FIRST_STACK_STEP);
       step1 -= step2;
@@ -9440,13 +9896,53 @@
   else
     {
       /* Restore the registers.  */
-      mips_for_each_saved_reg (frame->total_size - step2, mips_restore_reg);
+      mips_for_each_saved_acc (frame->total_size - step2, mips_restore_reg);
+      mips_for_each_saved_gpr_and_fpr (frame->total_size - step2,
+				       mips_restore_reg);

-      /* Deallocate the final bit of the frame.  */
-      if (step2 > 0)
-	emit_insn (gen_add3_insn (stack_pointer_rtx,
-				  stack_pointer_rtx,
-				  GEN_INT (step2)));
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  HOST_WIDE_INT offset;
+	  rtx mem;
+
+	  offset = frame->cop0_sp_offset - (frame->total_size - step2);
+	  if (!cfun->machine->keep_interrupts_masked_p)
+	    {
+	      /* Restore the original EPC.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (stack_pointer_rtx, offset));
+	      mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	      offset -= UNITS_PER_WORD;
+
+	      /* Move to COP0 EPC.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_EPC_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM)));
+	    }
+
+	  /* Restore the original Status.  */
+	  mem = gen_frame_mem (word_mode,
+			       plus_constant (stack_pointer_rtx, offset));
+	  mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	  offset -= UNITS_PER_WORD;
+
+	  /* If we don't use shoadow register set, we need to update SP.  */
+	  if (!cfun->machine->use_shadow_register_set_p && step2 > 0)
+	    emit_insn (gen_add3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (step2)));
+
+	  /* Move to COP0 Status.  */
+	  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				    gen_rtx_REG (SImode, K0_REG_NUM)));
+	}
+      else
+	{
+	  /* Deallocate the final bit of the frame.  */
+	  if (step2 > 0)
+	    emit_insn (gen_add3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (step2)));
+	}
     }

   /* Add in the __builtin_eh_return stack adjustment.  We need to
@@ -9469,18 +9965,44 @@

   if (!sibcall_p)
     {
-      unsigned int regno;
-
-      /* When generating MIPS16 code, the normal mips_for_each_saved_reg
-	 path will restore the return address into $7 rather than $31.  */
-      if (TARGET_MIPS16
-	  && !GENERATE_MIPS16E_SAVE_RESTORE
-	  && BITSET_P (frame->mask, 31))
-	regno = GP_REG_FIRST + 7;
-      else
-	regno = GP_REG_FIRST + 31;
       mips_expand_before_return ();
-      emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno)));
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  /* Interrupt handlers generate eret or deret.  */
+	  if (cfun->machine->use_debug_exception_return_p)
+	    emit_jump_insn (gen_mips_deret ());
+	  else
+	    emit_jump_insn (gen_mips_eret ());
+	}
+      else
+	{
+	  unsigned int regno;
+
+	  /* When generating MIPS16 code, the normal
+	     mips_for_each_saved_gpr_and_fpr path will restore the return
+	     address into $7 rather than $31.  */
+	  if (TARGET_MIPS16
+	      && !GENERATE_MIPS16E_SAVE_RESTORE
+	      && BITSET_P (frame->mask, 31))
+	    regno = GP_REG_FIRST + 7;
+	  else
+	    regno = GP_REG_FIRST + 31;
+	  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno)));
+	}
+    }
+
+  /* Search from the beginning to the first use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p
+      && !cfun->machine->keep_interrupts_masked_p)
+    {
+      for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN(insn), mips_kernel_reg_p, NULL))
+	  break;
+      gcc_assert (insn != NULL_RTX);
+      /* Insert disable interrupts before the first use of K0 or K1.  */
+      emit_insn_before (gen_mips_di (), insn);
+      emit_insn_before (gen_mips_ehb (), insn);
     }
 }

@@ -9491,6 +10013,10 @@
 bool
 mips_can_use_return_insn (void)
 {
+  /* Interrupt handlers need to go through the epilogue.  */
+  if (cfun->machine->interrupt_handler_p)
+    return false;
+
   if (!reload_completed)
     return false;

@@ -10422,10 +10948,15 @@
 	  s = "bnez\t%2,1f\n\tbreak\t7\n1:";
 	}
       else if (GENERATE_DIVIDE_TRAPS)
-        {
-	  output_asm_insn (s, operands);
-	  s = "teq\t%2,%.,7";
-        }
+	{
+	  if (TUNE_74K)
+	    output_asm_insn ("teq\t%2,%.,7", operands);
+	  else
+	    {
+	      output_asm_insn (s, operands);
+	      s = "teq\t%2,%.,7";
+	    }
+	}
       else
 	{
 	  output_asm_insn ("%(bne\t%2,%.,1f", operands);
@@ -10737,7 +11268,17 @@
       ready[pos2] = temp;
     }
 }
-
+
+int
+mips_mult_madd_chain_bypass_p (rtx out_insn ATTRIBUTE_UNUSED,
+			       rtx in_insn ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return false;
+  else
+    return true;
+}
+
 /* Used by TUNE_MACC_CHAINS to record the last scheduled instruction
    that may clobber hi or lo.  */
 static rtx mips_macc_chains_last_hilo;
@@ -13910,6 +14451,14 @@
        long as any indirect jumps use $25.  */
     flag_pic = 1;

+  /* For SDE, switch on ABICALLS mode if -fpic or -fpie were used, and the
+     user hasn't explicitly disabled these modes.  */
+  if (TARGET_MIPS_SDE
+      && (flag_pic || flag_pie) && !TARGET_ABICALLS
+      && !((target_flags_explicit & MASK_ABICALLS))
+      && mips_abi != ABI_EABI)
+    target_flags |= MASK_ABICALLS;
+
   /* -mvr4130-align is a "speed over size" optimization: it usually produces
      faster code, but at the expense of more nops.  Enable it at -O3 and
      above.  */
@@ -13984,26 +14533,46 @@
   if (TARGET_DSPR2)
     target_flags |= MASK_DSP;

-  /* .eh_frame addresses should be the same width as a C pointer.
-     Most MIPS ABIs support only one pointer size, so the assembler
-     will usually know exactly how big an .eh_frame address is.
-
-     Unfortunately, this is not true of the 64-bit EABI.  The ABI was
-     originally defined to use 64-bit pointers (i.e. it is LP64), and
-     this is still the default mode.  However, we also support an n32-like
-     ILP32 mode, which is selected by -mlong32.  The problem is that the
-     assembler has traditionally not had an -mlong option, so it has
-     traditionally not known whether we're using the ILP32 or LP64 form.
-
-     As it happens, gas versions up to and including 2.19 use _32-bit_
-     addresses for EABI64 .cfi_* directives.  This is wrong for the
-     default LP64 mode, so we can't use the directives by default.
-     Moreover, since gas's current behavior is at odds with gcc's
-     default behavior, it seems unwise to rely on future versions
-     of gas behaving the same way.  We therefore avoid using .cfi
-     directives for -mlong32 as well.  */
-  if (mips_abi == ABI_EABI && TARGET_64BIT)
-    flag_dwarf2_cfi_asm = 0;
+  /* Use the traditional method of generating .eh_frames.
+     We need this for two reasons:
+
+     - .eh_frame addresses should be the same width as a C pointer.
+       Most MIPS ABIs support only one pointer size, so the assembler
+       will usually know exactly how big an .eh_frame address is.
+
+       Unfortunately, this is not true of the 64-bit EABI.  The ABI was
+       originally defined to use 64-bit pointers (i.e. it is LP64), and
+       this is still the default mode.  However, we also support an n32-like
+       ILP32 mode, which is selected by -mlong32.  The problem is that the
+       assembler has traditionally not had an -mlong option, so it has
+       traditionally not known whether we're using the ILP32 or LP64 form.
+
+       As it happens, gas versions up to and including 2.19 use _32-bit_
+       addresses for EABI64 .cfi_* directives.  This is wrong for the
+       default LP64 mode, so we can't use the directives by default.
+       Moreover, since gas's current behavior is at odds with gcc's
+       default behavior, it seems unwise to rely on future versions
+       of gas behaving the same way.  We therefore avoid using .cfi
+       directives for -mlong32 as well.
+
+     - .cfi* directives generate read-only .eh_frame sections.
+       However, MIPS has traditionally not allowed directives like:
+
+	    .long   x-.
+
+       in cases where "x" is in a different section, or is not defined
+       in the same assembly file.  We have therefore traditionally
+       used absolute addresses and a writable .eh_frame instead.
+
+       The linker is able to convert most of these absolute addresses
+       into PC-relative form where doing so is necessary to avoid
+       relocations.  However, until 2.21, it wasn't able to do this
+       for indirect encodings or personality routines.
+
+       GNU ld 2.21 and GCC 4.5 have support for read-only .eh_frames,
+       but for the time being, we should stick to the approach used
+       in 4.3 and earlier.  */
+  flag_dwarf2_cfi_asm = 0;

   mips_init_print_operand_punct ();

@@ -14242,6 +14811,178 @@
       reg_alloc_order[24] = 0;
     }
 }
+
+/* Implement EPILOGUE_USES.  */
+
+bool
+mips_epilogue_uses (unsigned int regno)
+{
+  /* Say that the epilogue uses the return address register.  Note that
+     in the case of sibcalls, the values "used by the epilogue" are
+     considered live at the start of the called function.  */
+  if (regno == 31)
+    return true;
+
+  /* If using a GOT, say that the epilogue also uses GOT_VERSION_REGNUM.
+     See the comment above load_call<mode> for details.  */
+  if (TARGET_USE_GOT && (regno) == GOT_VERSION_REGNUM)
+    return true;
+
+  /* An interrupt handler must preserve some registers that are
+     ordinarily call-clobbered.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  return false;
+}
+
+#ifdef CVMX_SHARED_BSS_FLAGS
+/* Handle a "cvmx_shared" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+octeon_handle_cvmx_shared_attribute (tree *node, tree name,
+				     tree args ATTRIBUTE_UNUSED,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (OPT_Wattributes, "%qs attribute only applies to variables",
+              IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Switch to the appropriate section for output of DECL.
+   DECL is either a `VAR_DECL' node or a constant of some sort.
+   RELOC indicates whether forming the initial value of DECL requires
+   link-time relocations.  */
+
+static section *
+octeon_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  if (decl && TREE_CODE (decl) == VAR_DECL
+      && lookup_attribute ("cvmx_shared", DECL_ATTRIBUTES (decl)))
+    {
+      const char *sname = NULL;
+      unsigned int flags = SECTION_WRITE;
+
+      switch (categorize_decl_for_section (decl, reloc))
+        {
+	  case SECCAT_DATA:
+	  case SECCAT_SDATA:
+	  case SECCAT_RODATA:
+	  case SECCAT_SRODATA:
+	  case SECCAT_RODATA_MERGE_STR:
+	  case SECCAT_RODATA_MERGE_STR_INIT:
+	  case SECCAT_RODATA_MERGE_CONST:
+	  case SECCAT_DATA_REL:
+	  case SECCAT_DATA_REL_LOCAL:
+	  case SECCAT_DATA_REL_RO:
+	  case SECCAT_DATA_REL_RO_LOCAL:
+	    sname = ".cvmx_shared";
+	    break;
+	  case SECCAT_BSS:
+	  case SECCAT_SBSS:
+	    sname = ".cvmx_shared_bss";
+	    flags |= SECTION_BSS;
+	    break;
+	  case SECCAT_TEXT:
+	  case SECCAT_TDATA:
+	  case SECCAT_TBSS:
+            break;
+        }
+      if (sname)
+	{
+	  return get_section (sname, flags, decl);
+	}
+    }
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+   RELOC indicates whether the initial value of EXP requires
+   link-time relocations.  */
+
+static void
+octeon_unique_section (tree decl, int reloc)
+{
+  if (decl && TREE_CODE (decl) == VAR_DECL
+      && lookup_attribute ("cvmx_shared", DECL_ATTRIBUTES (decl)))
+    {
+      const char *sname = NULL;
+
+      if (! DECL_ONE_ONLY (decl))
+	{
+	  section *sect;
+	  sect = octeon_select_section (decl, reloc, DECL_ALIGN (decl));
+	  DECL_SECTION_NAME (decl) = build_string (strlen (sect->named.name),
+						   sect->named.name);
+	  return;
+	}
+
+      switch (categorize_decl_for_section (decl, reloc))
+        {
+	  case SECCAT_BSS:
+	  case SECCAT_SBSS:
+	    sname = ".cvmx_shared_bss.linkonce.";
+	    break;
+	  case SECCAT_SDATA:
+	  case SECCAT_DATA:
+	  case SECCAT_DATA_REL:
+	  case SECCAT_DATA_REL_LOCAL:
+	  case SECCAT_DATA_REL_RO:
+	  case SECCAT_DATA_REL_RO_LOCAL:
+	  case SECCAT_RODATA:
+	  case SECCAT_SRODATA:
+	  case SECCAT_RODATA_MERGE_STR:
+	  case SECCAT_RODATA_MERGE_STR_INIT:
+	  case SECCAT_RODATA_MERGE_CONST:
+	    sname = ".cvmx_shared.linkonce.";
+	    break;
+	  case SECCAT_TEXT:
+	  case SECCAT_TDATA:
+	  case SECCAT_TBSS:
+	    break;
+	}
+      if (sname)
+        {
+	  const char *name;
+	  size_t plen, nlen;
+	  char *string;
+	  plen = strlen (sname);
+
+	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+	  name = targetm.strip_name_encoding (name);
+	  nlen = strlen (name);
+
+	  string = alloca (plen + nlen + 1);
+	  memcpy (string, sname, plen);
+	  memcpy (string + plen, name, nlen + 1);
+	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
+	  return;
+        }
+    }
+  default_unique_section (decl, reloc);
+}
+
+/* Emit an uninitialized cvmx_shared variable.  */
+void
+octeon_output_shared_variable (FILE *stream, tree decl, const char *name,
+                               unsigned HOST_WIDE_INT size, int align)
+{
+  switch_to_section (get_section (".cvmx_shared_bss", CVMX_SHARED_BSS_FLAGS,
+				  NULL_TREE));
+  ASM_OUTPUT_ALIGN (stream, floor_log2 (align / BITS_PER_UNIT));
+  ASM_DECLARE_OBJECT_NAME (stream, name, decl);
+  ASM_OUTPUT_SKIP (stream, size != 0 ? size : 1);
+}
+#endif

 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -42,9 +42,9 @@
 		     (match_operand:DSPV 2 "register_operand" "d")))
      (set (reg:CCDSP CCDSP_OU_REGNUM)
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ))])]
-  ""
+  "ISA_HAS_DSP"
   "add<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_add<DSP:dspfmt1>_s_<DSP:dspfmt2>"
@@ -55,9 +55,9 @@
 		      UNSPEC_ADDQ_S))
      (set (reg:CCDSP CCDSP_OU_REGNUM)
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
-  ""
+  "ISA_HAS_DSP"
   "add<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 ;; SUBQ*
@@ -70,7 +70,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ))])]
   "ISA_HAS_DSP"
   "sub<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_sub<DSP:dspfmt1>_s_<DSP:dspfmt2>"
@@ -83,7 +83,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
   "ISA_HAS_DSP"
   "sub<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 ;; ADDSC
@@ -97,7 +97,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDSC))])]
   "ISA_HAS_DSP"
   "addsc\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; ADDWC
@@ -112,7 +112,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDWC))])]
   "ISA_HAS_DSP"
   "addwc\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; MODSUB
@@ -123,7 +123,7 @@
 		   UNSPEC_MODSUB))]
   "ISA_HAS_DSP"
   "modsub\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; RADDU*
@@ -133,7 +133,7 @@
 		   UNSPEC_RADDU_W_QB))]
   "ISA_HAS_DSP"
   "raddu.w.qb\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; ABSQ*
@@ -146,7 +146,7 @@
 	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S))])]
   "ISA_HAS_DSP"
   "absq_s.<DSPQ:dspfmt2>\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 ;; PRECRQ*
@@ -157,7 +157,7 @@
 		     UNSPEC_PRECRQ_QB_PH))]
   "ISA_HAS_DSP"
   "precrq.qb.ph\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precrq_ph_w"
@@ -167,7 +167,7 @@
 		     UNSPEC_PRECRQ_PH_W))]
   "ISA_HAS_DSP"
   "precrq.ph.w\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precrq_rs_ph_w"
@@ -181,7 +181,7 @@
 			UNSPEC_PRECRQ_RS_PH_W))])]
   "ISA_HAS_DSP"
   "precrq_rs.ph.w\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; PRECRQU*
@@ -196,7 +196,7 @@
 			UNSPEC_PRECRQU_S_QB_PH))])]
   "ISA_HAS_DSP"
   "precrqu_s.qb.ph\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 ;; PRECEQ*
@@ -206,7 +206,7 @@
 		   UNSPEC_PRECEQ_W_PHL))]
   "ISA_HAS_DSP"
   "preceq.w.phl\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_preceq_w_phr"
@@ -215,7 +215,7 @@
 		   UNSPEC_PRECEQ_W_PHR))]
   "ISA_HAS_DSP"
   "preceq.w.phr\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; PRECEQU*
@@ -225,7 +225,7 @@
 		     UNSPEC_PRECEQU_PH_QBL))]
   "ISA_HAS_DSP"
   "precequ.ph.qbl\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precequ_ph_qbr"
@@ -234,7 +234,7 @@
 		     UNSPEC_PRECEQU_PH_QBR))]
   "ISA_HAS_DSP"
   "precequ.ph.qbr\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precequ_ph_qbla"
@@ -243,7 +243,7 @@
 		     UNSPEC_PRECEQU_PH_QBLA))]
   "ISA_HAS_DSP"
   "precequ.ph.qbla\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precequ_ph_qbra"
@@ -252,7 +252,7 @@
 		     UNSPEC_PRECEQU_PH_QBRA))]
   "ISA_HAS_DSP"
   "precequ.ph.qbra\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; PRECEU*
@@ -262,7 +262,7 @@
 		     UNSPEC_PRECEU_PH_QBL))]
   "ISA_HAS_DSP"
   "preceu.ph.qbl\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_preceu_ph_qbr"
@@ -271,7 +271,7 @@
 		     UNSPEC_PRECEU_PH_QBR))]
   "ISA_HAS_DSP"
   "preceu.ph.qbr\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_preceu_ph_qbla"
@@ -280,7 +280,7 @@
 		     UNSPEC_PRECEU_PH_QBLA))]
   "ISA_HAS_DSP"
   "preceu.ph.qbla\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_preceu_ph_qbra"
@@ -289,7 +289,7 @@
 		     UNSPEC_PRECEU_PH_QBRA))]
   "ISA_HAS_DSP"
   "preceu.ph.qbra\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; Table 2-2. MIPS DSP ASE Instructions: Shift
@@ -313,7 +313,7 @@
     }
   return "shllv.<DSPV:dspfmt2>\t%0,%1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_shll_s_<DSPQ:dspfmt2>"
@@ -335,7 +335,7 @@
     }
   return "shllv_s.<DSPQ:dspfmt2>\t%0,%1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 ;; SHRL*
@@ -354,7 +354,7 @@
     }
   return "shrlv.qb\t%0,%1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; SHRA*
@@ -373,7 +373,7 @@
     }
   return "shrav.ph\t%0,%1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_shra_r_<DSPQ:dspfmt2>"
@@ -392,7 +392,7 @@
     }
   return "shrav_r.<DSPQ:dspfmt2>\t%0,%1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; Table 2-3. MIPS DSP ASE Instructions: Multiply
@@ -478,7 +478,7 @@
 		   UNSPEC_DPAU_H_QBL))]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpau.h.qbl\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpau_h_qbr"
@@ -489,7 +489,7 @@
 		   UNSPEC_DPAU_H_QBR))]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpau.h.qbr\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; DPSU*
@@ -501,7 +501,7 @@
 		   UNSPEC_DPSU_H_QBL))]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpsu.h.qbl\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpsu_h_qbr"
@@ -512,7 +512,7 @@
 		   UNSPEC_DPSU_H_QBR))]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpsu.h.qbr\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; DPAQ*
@@ -528,7 +528,7 @@
 			UNSPEC_DPAQ_S_W_PH))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpaq_s.w.ph\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; DPSQ*
@@ -544,7 +544,7 @@
 			UNSPEC_DPSQ_S_W_PH))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpsq_s.w.ph\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; MULSAQ*
@@ -560,7 +560,7 @@
 			UNSPEC_MULSAQ_S_W_PH))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "mulsaq_s.w.ph\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; DPAQ*
@@ -576,7 +576,7 @@
 			UNSPEC_DPAQ_SA_L_W))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpaq_sa.l.w\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
    (set_attr "mode"	"SI")])

 ;; DPSQ*
@@ -592,7 +592,7 @@
 			UNSPEC_DPSQ_SA_L_W))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "dpsq_sa.l.w\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
    (set_attr "mode"	"SI")])

 ;; MAQ*
@@ -608,7 +608,7 @@
 			UNSPEC_MAQ_S_W_PHL))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "maq_s.w.phl\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_maq_s_w_phr"
@@ -623,7 +623,7 @@
 			UNSPEC_MAQ_S_W_PHR))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "maq_s.w.phr\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 ;; MAQ_SA*
@@ -639,7 +639,7 @@
 			UNSPEC_MAQ_SA_W_PHL))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "maq_sa.w.phl\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_maq_sa_w_phr"
@@ -654,7 +654,7 @@
 			UNSPEC_MAQ_SA_W_PHR))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "maq_sa.w.phr\t%q0,%2,%3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
    (set_attr "mode"	"SI")])

 ;; Table 2-4. MIPS DSP ASE Instructions: General Bit/Manipulation
@@ -665,7 +665,7 @@
 		   UNSPEC_BITREV))]
   "ISA_HAS_DSP"
   "bitrev\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; INSV
@@ -678,7 +678,7 @@
 		   UNSPEC_INSV))]
   "ISA_HAS_DSP"
   "insv\t%0,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; REPL*
@@ -696,7 +696,7 @@
     }
   return "replv.qb\t%0,%1";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_repl_ph"
@@ -707,7 +707,7 @@
   "@
    repl.ph\t%0,%1
    replv.ph\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; Table 2-5. MIPS DSP ASE Instructions: Compare-Pick
@@ -720,7 +720,7 @@
 		      UNSPEC_CMP_EQ))]
   "ISA_HAS_DSP"
   "cmp<DSPV:dspfmt1_1>.eq.<DSPV:dspfmt2>\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmp<DSPV:dspfmt1_1>_lt_<DSPV:dspfmt2>"
@@ -731,7 +731,7 @@
 		      UNSPEC_CMP_LT))]
   "ISA_HAS_DSP"
   "cmp<DSPV:dspfmt1_1>.lt.<DSPV:dspfmt2>\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmp<DSPV:dspfmt1_1>_le_<DSPV:dspfmt2>"
@@ -742,7 +742,7 @@
 		      UNSPEC_CMP_LE))]
   "ISA_HAS_DSP"
   "cmp<DSPV:dspfmt1_1>.le.<DSPV:dspfmt2>\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgu_eq_qb"
@@ -752,7 +752,7 @@
 		   UNSPEC_CMPGU_EQ_QB))]
   "ISA_HAS_DSP"
   "cmpgu.eq.qb\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgu_lt_qb"
@@ -762,7 +762,7 @@
 		   UNSPEC_CMPGU_LT_QB))]
   "ISA_HAS_DSP"
   "cmpgu.lt.qb\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgu_le_qb"
@@ -772,7 +772,7 @@
 		   UNSPEC_CMPGU_LE_QB))]
   "ISA_HAS_DSP"
   "cmpgu.le.qb\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; PICK*
@@ -784,7 +784,7 @@
 		     UNSPEC_PICK))]
   "ISA_HAS_DSP"
   "pick.<DSPV:dspfmt2>\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; PACKRL*
@@ -795,7 +795,7 @@
 		     UNSPEC_PACKRL_PH))]
   "ISA_HAS_DSP"
   "packrl.ph\t%0,%1,%2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; Table 2-6. MIPS DSP ASE Instructions: Accumulator and DSPControl Access
@@ -818,7 +818,7 @@
     }
   return "extrv.w\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 (define_insn "mips_extr_r_w"
@@ -839,7 +839,7 @@
     }
   return "extrv_r.w\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 (define_insn "mips_extr_rs_w"
@@ -860,7 +860,7 @@
     }
   return "extrv_rs.w\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 ;; EXTR*_S.H
@@ -882,7 +882,7 @@
     }
   return "extrv_s.h\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 ;; EXTP*
@@ -905,7 +905,7 @@
     }
   return "extpv\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 (define_insn "mips_extpdp"
@@ -930,7 +930,7 @@
     }
   return "extpdpv\t%0,%q1,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accext")
    (set_attr "mode"	"SI")])

 ;; SHILO*
@@ -949,7 +949,7 @@
     }
   return "shilov\t%q0,%2";
 }
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accmod")
    (set_attr "mode"	"SI")])

 ;; MTHLIP*
@@ -965,7 +965,7 @@
 			 (reg:CCDSP CCDSP_PO_REGNUM)] UNSPEC_MTHLIP))])]
   "ISA_HAS_DSP && !TARGET_64BIT"
   "mthlip\t%2,%q0"
-  [(set_attr "type"	"mfhilo")
+  [(set_attr "type"	"accmod")
    (set_attr "mode"	"SI")])

 ;; WRDSP
@@ -987,7 +987,7 @@
 	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))])]
   "ISA_HAS_DSP"
   "wrdsp\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; RDDSP
@@ -1003,7 +1003,7 @@
 		   UNSPEC_RDDSP))]
   "ISA_HAS_DSP"
   "rddsp\t%0,%1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 ;; Table 2-7. MIPS DSP ASE Instructions: Indexed-Load
--- a/gcc/config/mips/mips-dspr2.md
+++ b/gcc/config/mips/mips-dspr2.md
@@ -9,7 +9,7 @@
 	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S_QB))])]
   "ISA_HAS_DSPR2"
   "absq_s.qb\t%0,%z1"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addu_ph"
@@ -21,7 +21,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_PH))])]
   "ISA_HAS_DSPR2"
   "addu.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addu_s_ph"
@@ -34,7 +34,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_S_PH))])]
   "ISA_HAS_DSPR2"
   "addu_s.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_adduh_qb"
@@ -44,7 +44,7 @@
 		     UNSPEC_ADDUH_QB))]
   "ISA_HAS_DSPR2"
   "adduh.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_adduh_r_qb"
@@ -54,7 +54,7 @@
 		     UNSPEC_ADDUH_R_QB))]
   "ISA_HAS_DSPR2"
   "adduh_r.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_append"
@@ -69,7 +69,7 @@
     operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
   return "append\t%0,%z2,%3";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_balign"
@@ -84,7 +84,7 @@
     operands[2] = GEN_INT (INTVAL (operands[2]) & 3);
   return "balign\t%0,%z2,%3";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgdu_eq_qb"
@@ -99,7 +99,7 @@
 			UNSPEC_CMPGDU_EQ_QB))])]
   "ISA_HAS_DSPR2"
   "cmpgdu.eq.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgdu_lt_qb"
@@ -114,7 +114,7 @@
 			UNSPEC_CMPGDU_LT_QB))])]
   "ISA_HAS_DSPR2"
   "cmpgdu.lt.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_cmpgdu_le_qb"
@@ -129,7 +129,7 @@
 			UNSPEC_CMPGDU_LE_QB))])]
   "ISA_HAS_DSPR2"
   "cmpgdu.le.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpa_w_ph"
@@ -140,7 +140,7 @@
 		   UNSPEC_DPA_W_PH))]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpa.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dps_w_ph"
@@ -151,7 +151,7 @@
 		   UNSPEC_DPS_W_PH))]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dps.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_expand "mips_madd<u>"
@@ -247,7 +247,7 @@
 		   UNSPEC_MULSA_W_PH))]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "mulsa.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_mult"
@@ -277,7 +277,7 @@
 		     UNSPEC_PRECR_QB_PH))]
   "ISA_HAS_DSPR2"
   "precr.qb.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precr_sra_ph_w"
@@ -292,7 +292,7 @@
     operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
   return "precr_sra.ph.w\t%0,%z2,%3";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_precr_sra_r_ph_w"
@@ -307,7 +307,7 @@
     operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
   return "precr_sra_r.ph.w\t%0,%z2,%3";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_prepend"
@@ -322,7 +322,7 @@
     operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
   return "prepend\t%0,%z2,%3";
 }
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_shra_qb"
@@ -340,7 +340,7 @@
     }
   return "shrav.qb\t%0,%z1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])


@@ -359,7 +359,7 @@
     }
   return "shrav_r.qb\t%0,%z1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_shrl_ph"
@@ -377,7 +377,7 @@
     }
   return "shrlv.ph\t%0,%z1,%2";
 }
-  [(set_attr "type"	"shift")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subu_ph"
@@ -390,7 +390,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_PH))])]
   "ISA_HAS_DSPR2"
   "subu.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subu_s_ph"
@@ -403,7 +403,7 @@
 	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_S_PH))])]
   "ISA_HAS_DSPR2"
   "subu_s.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalusat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subuh_qb"
@@ -413,7 +413,7 @@
 		     UNSPEC_SUBUH_QB))]
   "ISA_HAS_DSPR2"
   "subuh.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subuh_r_qb"
@@ -423,7 +423,7 @@
 		     UNSPEC_SUBUH_R_QB))]
   "ISA_HAS_DSPR2"
   "subuh_r.qb\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addqh_ph"
@@ -433,7 +433,7 @@
 		     UNSPEC_ADDQH_PH))]
   "ISA_HAS_DSPR2"
   "addqh.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addqh_r_ph"
@@ -443,7 +443,7 @@
 		     UNSPEC_ADDQH_R_PH))]
   "ISA_HAS_DSPR2"
   "addqh_r.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addqh_w"
@@ -453,7 +453,7 @@
 		   UNSPEC_ADDQH_W))]
   "ISA_HAS_DSPR2"
   "addqh.w\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_addqh_r_w"
@@ -463,7 +463,7 @@
 		   UNSPEC_ADDQH_R_W))]
   "ISA_HAS_DSPR2"
   "addqh_r.w\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subqh_ph"
@@ -473,7 +473,7 @@
 		     UNSPEC_SUBQH_PH))]
   "ISA_HAS_DSPR2"
   "subqh.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subqh_r_ph"
@@ -483,7 +483,7 @@
 		     UNSPEC_SUBQH_R_PH))]
   "ISA_HAS_DSPR2"
   "subqh_r.ph\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subqh_w"
@@ -493,7 +493,7 @@
 		   UNSPEC_SUBQH_W))]
   "ISA_HAS_DSPR2"
   "subqh.w\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_subqh_r_w"
@@ -503,7 +503,7 @@
 		   UNSPEC_SUBQH_R_W))]
   "ISA_HAS_DSPR2"
   "subqh_r.w\t%0,%z1,%z2"
-  [(set_attr "type"	"arith")
+  [(set_attr "type"	"dspalu")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpax_w_ph"
@@ -514,7 +514,7 @@
 		   UNSPEC_DPAX_W_PH))]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpax.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpsx_w_ph"
@@ -525,7 +525,7 @@
 		   UNSPEC_DPSX_W_PH))]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpsx.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpaqx_s_w_ph"
@@ -540,7 +540,7 @@
 			UNSPEC_DPAQX_S_W_PH))])]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpaqx_s.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpaqx_sa_w_ph"
@@ -555,7 +555,7 @@
 			UNSPEC_DPAQX_SA_W_PH))])]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpaqx_sa.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpsqx_s_w_ph"
@@ -570,7 +570,7 @@
 			UNSPEC_DPSQX_S_W_PH))])]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpsqx_s.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmac")
    (set_attr "mode"	"SI")])

 (define_insn "mips_dpsqx_sa_w_ph"
@@ -585,5 +585,43 @@
 			UNSPEC_DPSQX_SA_W_PH))])]
   "ISA_HAS_DSPR2 && !TARGET_64BIT"
   "dpsqx_sa.w.ph\t%q0,%z2,%z3"
-  [(set_attr "type"	"imadd")
+  [(set_attr "type"	"dspmacsat")
+   (set_attr "mode"	"SI")])
+
+;; Convert  mtlo $ac[1-3],$0  =>  mult $ac[1-3],$0,$0
+;;          mthi $ac[1-3],$0
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const_int 0))
+   (set (match_operand:SI 1 "register_operand" "")
+	(const_int 0))]
+  "ISA_HAS_DSPR2
+   && !TARGET_MIPS16
+   && !TARGET_64BIT
+   && (((true_regnum (operands[0]) == AC1LO_REGNUM
+		     && true_regnum (operands[1]) == AC1HI_REGNUM)
+	|| (true_regnum (operands[0]) == AC1HI_REGNUM
+			&& true_regnum (operands[1]) == AC1LO_REGNUM))
+       || ((true_regnum (operands[0]) == AC2LO_REGNUM
+			&& true_regnum (operands[1]) == AC2HI_REGNUM)
+	   || (true_regnum (operands[0]) == AC2HI_REGNUM
+			&& true_regnum (operands[1]) == AC2LO_REGNUM))
+       || ((true_regnum (operands[0]) == AC3LO_REGNUM
+		     && true_regnum (operands[1]) == AC3HI_REGNUM)
+	   || (true_regnum (operands[0]) == AC3HI_REGNUM
+			   && true_regnum (operands[1]) == AC3LO_REGNUM)))"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (set (match_dup 1) (const_int 0))])]
+)
+
+(define_insn "*mips_acc_init"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=a")
+	      (const_int 0))
+	      (set (match_operand:SI 1 "register_operand" "=a")
+	      (const_int 0))])]
+  "ISA_HAS_DSPR2
+   && !TARGET_MIPS16
+   && !TARGET_64BIT"
+  "mult\t%q0,$0,$0\t\t# Clear ACC HI/LO"
+  [(set_attr "type"	"imul")
    (set_attr "mode"	"SI")])
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -342,6 +342,9 @@
 #define TARGET_IRIX	   0
 #define TARGET_IRIX6	   0

+/* SDE specific stuff.  */
+#define TARGET_MIPS_SDE    0
+
 /* Define preprocessor macros for the -march and -mtune options.
    PREFIX is either _MIPS_ARCH or _MIPS_TUNE, INFO is the selected
    processor.  If INFO's canonical name is "foo", define PREFIX to
@@ -708,8 +711,9 @@
        |march=r10000|march=r12000|march=r14000|march=r16000:-mips4} \
      %{march=mips32|march=4kc|march=4km|march=4kp|march=4ksc:-mips32} \
      %{march=mips32r2|march=m4k|march=4ke*|march=4ksd|march=24k* \
-       |march=34k*|march=74k*: -mips32r2} \
-     %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000: -mips64} \
+       |march=34k*|march=74k*|march=1004k*: -mips32r2} \
+     %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \
+       |march=xlr: -mips64} \
      %{march=mips64r2|march=octeon: -mips64r2} \
      %{!march=*: -" MULTILIB_ISA_DEFAULT "}}"

@@ -720,7 +724,8 @@
 #define MIPS_ARCH_FLOAT_SPEC \
   "%{mhard-float|msoft-float|march=mips*:; \
      march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
-     |march=34kc|march=74kc|march=5kc|march=octeon: -msoft-float; \
+     |march=34kc|march=74kc|march=1004kc|march=5kc \
+     |march=octeon|march=xlr: -msoft-float;		  \
      march=*: -mhard-float}"

 /* A spec condition that matches 32-bit options.  It only works if
@@ -731,8 +736,9 @@

 /* Support for a compile-time default CPU, et cetera.  The rules are:
    --with-arch is ignored if -march is specified or a -mips is specified
-     (other than -mips16).
-   --with-tune is ignored if -mtune is specified.
+     (other than -mips16); likewise --with-arch-32 and --with-arch-64.
+   --with-tune is ignored if -mtune is specified; likewise
+     --with-tune-32 and --with-tune-64.
    --with-abi is ignored if -mabi is specified.
    --with-float is ignored if -mhard-float or -msoft-float are
      specified.
@@ -740,7 +746,11 @@
      specified. */
 #define OPTION_DEFAULT_SPECS \
   {"arch", "%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \
+  {"arch_32", "%{!mabi=*|mabi=32:%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+  {"arch_64", "%{mabi=n32|mabi=64:%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
   {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"tune_32", "%{!mabi=*|mabi=32:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"tune_64", "%{mabi=n32|mabi=64:%{!mtune=*:-mtune=%(VALUE)}}" }, \
   {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
   {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \
   {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \
@@ -750,7 +760,7 @@

 /* A spec that infers the -mdsp setting from an -march argument.  */
 #define BASE_DRIVER_SELF_SPECS \
-  "%{!mno-dsp:%{march=24ke*|march=34k*|march=74k*: -mdsp}}"
+  "%{!mno-dsp:%{march=24ke*|march=34k*|march=74k*|march=1004k*: -mdsp}}"

 #define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS

@@ -1038,6 +1048,11 @@
 /* ISA includes the bbit* instructions.  */
 #define ISA_HAS_BBIT		(TARGET_OCTEON && !TARGET_MIPS16)

+/* ISA has single-instruction unaligned load/store support.  */
+#define ISA_HAS_UL_US          (TARGET_OCTEON \
+                                && TARGET_OCTEON_UNALIGNED \
+                                && !TARGET_MIPS16)
+
 /* ISA includes the cins instruction.  */
 #define ISA_HAS_CINS		(TARGET_OCTEON && !TARGET_MIPS16)

@@ -1055,6 +1070,7 @@

 /* The CACHE instruction is available.  */
 #define ISA_HAS_CACHE (TARGET_CACHE_BUILTIN && !TARGET_MIPS16)
+

 /* Add -G xx support.  */

@@ -1152,6 +1168,7 @@
 %{mshared} %{mno-shared} \
 %{msym32} %{mno-sym32} \
 %{mtune=*} %{v} \
+%{mocteon-useun} %{mno-octeon-useun} \
 %(subtarget_asm_spec)"

 /* Extra switches sometimes passed to the linker.  */
@@ -1622,6 +1639,9 @@
 #define GP_REG_LAST  31
 #define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
 #define GP_DBX_FIRST 0
+#define K0_REG_NUM   (GP_REG_FIRST + 26)
+#define K1_REG_NUM   (GP_REG_FIRST + 27)
+#define KERNEL_REG_P(REGNO)	(IN_RANGE (REGNO, K0_REG_NUM, K1_REG_NUM))

 #define FP_REG_FIRST 32
 #define FP_REG_LAST  63
@@ -1649,6 +1669,10 @@
 #define COP0_REG_LAST 111
 #define COP0_REG_NUM (COP0_REG_LAST - COP0_REG_FIRST + 1)

+#define COP0_STATUS_REG_NUM	(COP0_REG_FIRST + 12)
+#define COP0_CAUSE_REG_NUM	(COP0_REG_FIRST + 13)
+#define COP0_EPC_REG_NUM	(COP0_REG_FIRST + 14)
+
 #define COP2_REG_FIRST 112
 #define COP2_REG_LAST 143
 #define COP2_REG_NUM (COP2_REG_LAST - COP2_REG_FIRST + 1)
@@ -1666,6 +1690,29 @@
 #define AT_REGNUM	(GP_REG_FIRST + 1)
 #define HI_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST : MD_REG_FIRST + 1)
 #define LO_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST + 1 : MD_REG_FIRST)
+#define AC1HI_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST : DSP_ACC_REG_FIRST + 1)
+#define AC1LO_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST + 1 : DSP_ACC_REG_FIRST)
+#define AC2HI_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST + 2 : DSP_ACC_REG_FIRST + 3)
+#define AC2LO_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST + 3 : DSP_ACC_REG_FIRST + 2)
+#define AC3HI_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST + 4 : DSP_ACC_REG_FIRST + 5)
+#define AC3LO_REGNUM	(TARGET_BIG_ENDIAN \
+			 ? DSP_ACC_REG_FIRST + 5 : DSP_ACC_REG_FIRST + 4)
+
+/* A few bitfield locations for the coprocessor registers.  */
+/* Request Interrupt Priority Level is from bit 10 to bit 15 of
+   the cause register for the EIC interrupt mode.  */
+#define CAUSE_IPL	10
+/* Interrupt Priority Level is from bit 10 to bit 15 of the status register.  */
+#define SR_IPL		10
+/* Exception Level is at bit 1 of the status register.  */
+#define SR_EXL		1
+/* Interrupt Enable is at bit 0 of the status register.  */
+#define SR_IE		0

 /* FPSW_REGNUM is the single condition code used if !ISA_HAS_8CC.
    If ISA_HAS_8CC, it should not be used, and an arbitrary ST_REG
@@ -1754,11 +1801,18 @@
    incoming arguments, the static chain pointer, or the frame pointer.
    The epilogue temporary mustn't conflict with the return registers,
    the PIC call register ($25), the frame pointer, the EH stack adjustment,
-   or the EH data registers.  */
+   or the EH data registers.
+
+   If we're generating interrupt handlers, we use K0 as a temporary register
+   in prologue/epilogue code.  */

 #define MIPS16_PIC_TEMP_REGNUM (GP_REG_FIRST + 2)
-#define MIPS_PROLOGUE_TEMP_REGNUM (GP_REG_FIRST + 3)
-#define MIPS_EPILOGUE_TEMP_REGNUM (GP_REG_FIRST + (TARGET_MIPS16 ? 6 : 8))
+#define MIPS_PROLOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 3)
+#define MIPS_EPILOGUE_TEMP_REGNUM		\
+  (cfun->machine->interrupt_handler_p		\
+   ? K0_REG_NUM					\
+   : GP_REG_FIRST + (TARGET_MIPS16 ? 6 : 8))

 #define MIPS16_PIC_TEMP gen_rtx_REG (Pmode, MIPS16_PIC_TEMP_REGNUM)
 #define MIPS_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, MIPS_PROLOGUE_TEMP_REGNUM)
@@ -2284,14 +2338,7 @@
 	(mips_abi == ABI_EABI && UNITS_PER_FPVALUE >= UNITS_PER_DOUBLE)


-/* Say that the epilogue uses the return address register.  Note that
-   in the case of sibcalls, the values "used by the epilogue" are
-   considered live at the start of the called function.
-
-   If using a GOT, say that the epilogue also uses GOT_VERSION_REGNUM.
-   See the comment above load_call<mode> for details.  */
-#define EPILOGUE_USES(REGNO) \
-  ((REGNO) == 31 || (TARGET_USE_GOT && (REGNO) == GOT_VERSION_REGNUM))
+#define EPILOGUE_USES(REGNO)	mips_epilogue_uses (REGNO)

 /* Treat LOC as a byte offset from the stack pointer and round it up
    to the next fully-aligned offset.  */
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -67,7 +67,16 @@
    (UNSPEC_SET_GOT_VERSION	46)
    (UNSPEC_UPDATE_GOT_VERSION	47)
    (UNSPEC_COPYGP		48)
+   (UNSPEC_ERET			49)
+   (UNSPEC_DERET		50)
+   (UNSPEC_DI			51)
+   (UNSPEC_EHB			52)
+   (UNSPEC_RDPGPR		53)
+   (UNSPEC_COP0			54)

+   (UNSPEC_UNALIGNED_LOAD	60)
+   (UNSPEC_UNALIGNED_STORE	61)
+
    (UNSPEC_ADDRESS_FIRST	100)

    (TLS_GET_TP_REGNUM		3)
@@ -372,6 +381,12 @@
 ;; frsqrt       floating point reciprocal square root
 ;; frsqrt1      floating point reciprocal square root step1
 ;; frsqrt2      floating point reciprocal square root step2
+;; dspmac       DSP MAC instructions not saturating the accumulator
+;; dspmacsat    DSP MAC instructions that saturate the accumulator
+;; accext       DSP accumulator extract instructions
+;; accmod       DSP accumulator modify instructions
+;; dspalu       DSP ALU instructions not saturating the result
+;; dspalusat    DSP ALU instructions that saturate the result
 ;; multi	multiword sequence (or user asm statements)
 ;; nop		no operation
 ;; ghost	an instruction that produces no real code
@@ -380,7 +395,7 @@
    prefetch,prefetchx,condmove,mtc,mfc,mthilo,mfhilo,const,arith,logical,
    shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
    fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
-   frsqrt,frsqrt1,frsqrt2,multi,nop,ghost"
+   frsqrt,frsqrt1,frsqrt2,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat,multi,nop,ghost"
   (cond [(eq_attr "jal" "!unset") (const_string "call")
 	 (eq_attr "got" "load") (const_string "load")

@@ -3565,7 +3580,9 @@
 	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
 		     (match_operand:QI 2 "memory_operand" "m")]
 		    UNSPEC_LOAD_LEFT))]
-  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "!TARGET_MIPS16
+   && !ISA_HAS_UL_US
+   && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
   "<load>l\t%0,%2"
   [(set_attr "move_type" "load")
    (set_attr "mode" "<MODE>")])
@@ -3576,7 +3593,9 @@
 		     (match_operand:QI 2 "memory_operand" "m")
 		     (match_operand:GPR 3 "register_operand" "0")]
 		    UNSPEC_LOAD_RIGHT))]
-  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "!TARGET_MIPS16
+   && !ISA_HAS_UL_US
+   && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
   "<load>r\t%0,%2"
   [(set_attr "move_type" "load")
    (set_attr "mode" "<MODE>")])
@@ -3586,7 +3605,9 @@
 	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
 		     (match_operand:QI 2 "memory_operand" "m")]
 		    UNSPEC_STORE_LEFT))]
-  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "!TARGET_MIPS16
+   && !ISA_HAS_UL_US
+   && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
   "<store>l\t%z1,%2"
   [(set_attr "move_type" "store")
    (set_attr "mode" "<MODE>")])
@@ -3602,6 +3623,28 @@
   [(set_attr "move_type" "store")
    (set_attr "mode" "<MODE>")])

+;; Unaligned load and store patterns.
+
+(define_insn "mov_u<load>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
+		     (match_operand:QI 2 "memory_operand" "m")]
+		    UNSPEC_UNALIGNED_LOAD))]
+  "ISA_HAS_UL_US && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "u<load>\t%0,%2"
+  [(set_attr "type" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_u<store>"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		     (match_operand:QI 2 "memory_operand" "m")]
+		    UNSPEC_UNALIGNED_STORE))]
+  "ISA_HAS_UL_US && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "u<store>\t%z1,%2"
+  [(set_attr "type" "store")
+   (set_attr "mode" "<MODE>")])
+
 ;; An instruction to calculate the high part of a 64-bit SYMBOL_ABSOLUTE.
 ;; The required value is:
 ;;
@@ -5472,6 +5515,26 @@
 	return "%*b\t%l0%/";
       else
 	{
+	  if (final_sequence && (mips_abi == ABI_32 || mips_abi == ABI_O64))
+	    {
+              /* If the delay slot contains a $gp restore, we need to
+                 do that first, because we need it for the load
+		 label.  Other ABIs do not have caller-save $gp.  */
+	      rtx next = NEXT_INSN (insn);
+	      if (INSN_P (next) && !INSN_DELETED_P (next))
+		{
+		  rtx pat = PATTERN (next);
+		  if (GET_CODE (pat) == SET
+		      && REG_P (SET_DEST (pat))
+		      && REGNO (SET_DEST (pat)) == PIC_OFFSET_TABLE_REGNUM)
+		    {
+		      rtx ops[2];
+		      ops[0] = SET_DEST (pat);
+		      ops[1] = SET_SRC (pat);
+		      output_asm_insn (mips_output_move (ops[0], ops[1]), ops);
+		    }
+		}
+	    }
 	  output_asm_insn (mips_output_load_label (), operands);
 	  return "%*jr\t%@%/%]";
 	}
@@ -5490,7 +5553,13 @@
 	      (lt (abs (minus (match_dup 0)
 			      (plus (pc) (const_int 4))))
 		  (const_int 131072)))
-	 (const_int 4) (const_int 16)))])
+	 (const_int 4)
+         (if_then_else
+	    ;; for these two ABIs we may need to move a restore of $gp
+	    (ior (eq (symbol_ref "mips_abi") (symbol_ref "ABI_32"))
+		 (eq (symbol_ref "mips_abi") (symbol_ref "ABI_O64")))
+	    (const_int 20)
+	    (const_int 16))))])

 ;; We need a different insn for the mips16, because a mips16 branch
 ;; does not have a delay slot.
@@ -5679,6 +5748,60 @@
   [(set_attr "type"	"jump")
    (set_attr "mode"	"none")])

+;; Exception return.
+(define_insn "mips_eret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_ERET)]
+  ""
+  "eret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Debug exception return.
+(define_insn "mips_deret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_DERET)]
+  ""
+  "deret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Disable interrupts.
+(define_insn "mips_di"
+  [(unspec_volatile [(const_int 0)] UNSPEC_DI)]
+  ""
+  "di"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Execution hazard barrier.
+(define_insn "mips_ehb"
+  [(unspec_volatile [(const_int 0)] UNSPEC_EHB)]
+  ""
+  "ehb"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Read GPR from previous shadow register set.
+(define_insn "mips_rdpgpr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d")]
+			    UNSPEC_RDPGPR))]
+  ""
+  "rdpgpr\t%0,%1"
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")])
+
+;; Move involving COP0 registers.
+(define_insn "cop0_move"
+  [(set (match_operand:SI 0 "register_operand" "=B,d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d,B")]
+			    UNSPEC_COP0))]
+  ""
+{ return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "type"	"mtc,mfc")
+   (set_attr "mode"	"SI")])
+
 ;; This is used in compiling the unwind routines.
 (define_expand "eh_return"
   [(use (match_operand 0 "general_operand"))]
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -184,6 +184,10 @@
 Target Report RejectNegative Mask(MIPS16)
 Generate MIPS16 code

+mips16e
+Target Report RejectNegative Mask(MIPS16) MaskExists
+Deprecated; alias for -mips16
+
 mips3d
 Target Report RejectNegative Mask(MIPS3D)
 Use MIPS-3D instructions
@@ -236,6 +240,10 @@
 Target Report RejectNegative InverseMask(MIPS3D)
 Do not use MIPS-3D instructions

+mocteon-useun
+Target Report Mask(OCTEON_UNALIGNED)
+Use Octeon-specific unaligned loads/stores for 32/64-bit data
+
 mpaired-single
 Target Report Mask(PAIRED_SINGLE_FLOAT)
 Use paired-single floating-point instructions
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -261,6 +261,8 @@
 extern void mips_output_external (FILE *, tree, const char *);
 extern void mips_output_filename (FILE *, const char *);
 extern void mips_output_ascii (FILE *, const char *, size_t);
+extern void octeon_output_shared_variable (FILE *, tree, const char *,
+					   unsigned HOST_WIDE_INT, int);
 extern void mips_output_aligned_decl_common (FILE *, tree, const char *,
 					     unsigned HOST_WIDE_INT,
 					     unsigned int);
@@ -307,6 +309,8 @@
 extern bool mips_linked_madd_p (rtx, rtx);
 extern bool mips_store_data_bypass_p (rtx, rtx);
 extern rtx mips_prefetch_cookie (rtx, rtx);
+extern int mips_mult_madd_chain_bypass_p (rtx, rtx);
+extern int mips_dspalu_bypass_p (rtx, rtx);

 extern void irix_asm_output_align (FILE *, unsigned);
 extern const char *current_section_name (void);
@@ -332,4 +336,6 @@

 extern void mips_expand_vector_init (rtx, rtx);

+extern bool mips_epilogue_uses (unsigned int);
+
 #endif /* ! GCC_MIPS_PROTOS_H */
--- /dev/null
+++ b/gcc/config/mips/octeon-elf.h
@@ -0,0 +1,98 @@
+/* Macros for mips*-octeon-elf target.
+   Copyright (C) 2004, 2005, 2006 Cavium Networks.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+/* Add MASK_SOFT_FLOAT and MASK_OCTEON_UNALIGNED.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_SOFT_FLOAT_ABI | MASK_OCTEON_UNALIGNED)
+
+/* Forward -m*octeon-useun.  */
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{mno-octeon-useun} %{!mno-octeon-useun:-mocteon-useun}"
+
+/* Enable backtrace including on machine exceptions by default.  */
+
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "%{!fno-asynchronous-unwind-tables:-fasynchronous-unwind-tables}"
+
+/* Without ASM_PREFERRED_EH_DATA_FORMAT, output_call_frame_info emits
+   pointer-sized addresses for FDE addresses.  For 64-bit targets, it does
+   it without properly "switching over" to 64-bit as described in the DWARF3
+   spec. GDB can fall back on .eh_frames and misinterpret FDE addresses.
+   Instead let's be explicit and use augmentation to describe the encoding if
+   pointer size is 64.  */
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  ((CODE) == 1 && POINTER_SIZE == 64                                    \
+   ? (ABI_HAS_64BIT_SYMBOLS ? DW_EH_PE_udata8 : DW_EH_PE_udata4)        \
+   : DW_EH_PE_absptr)
+
+/* Link to libc library.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Link to startup file.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crti%O%s crtbegin%O%s crt0%O%s"
+
+/* Default our test-only n64 configuration to -G0 since that is what
+   the kernel uses.  */
+
+#undef SUBTARGET_SELF_SPECS
+#define SUBTARGET_SELF_SPECS \
+"%{mabi=64:%{!G*: -G0}}"
+
+/* Pass linker emulation mode for N32.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32} %{mips32r2} %{mips64} \
+%{mips64r2} %{bestGnum} %{shared} %{non_shared} \
+%{mabi=n32:-melf32e%{!EL:b}%{EL:l}octeonn32} \
+%{mabi=64:-melf64e%{!EL:b}%{EL:l}octeon}"
+
+/* Override because of N32.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX ((mips_abi == ABI_N32) ? "." : "$")
+
+/* Append the core number to the GCOV filename FN.  */
+
+#define GCOV_TARGET_SUFFIX_LENGTH 2
+#define ADD_GCOV_TARGET_SUFFIX(FN)		\
+do						\
+  {						\
+    char *fn = FN;				\
+    int core;					\
+    char s[3];					\
+						\
+    asm ("rdhwr %0, $0" : "=r"(core));		\
+    sprintf (s, "%d", core);			\
+    strcat (fn, s);				\
+  }						\
+while (0)
+
+/* Code to unwind through the exception frame.  */
+#define MD_UNWIND_SUPPORT "config/mips/octeon-elf-unwind.h"
--- /dev/null
+++ b/gcc/config/mips/octeon-elf-unwind.h
@@ -0,0 +1,57 @@
+/* Stack unwinding support through the first exception frame.
+   Copyright (C) 2007 Cavium Networks.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR octeon_elf_fallback_frame_state
+
+/* Check whether this is the cvmx_interrupt_stage2 frame.  If the
+   function call was dispatched via k0 assume we are in
+   cvmx_interrupt_stage2.  In this case the sp in point to the saved
+   register array.  */
+
+static _Unwind_Reason_Code
+octeon_elf_fallback_frame_state (struct _Unwind_Context *context,
+				 _Unwind_FrameState *fs)
+{
+  unsigned i;
+  unsigned *pc = context->ra;
+
+  /* Look for "jalr k0".  */
+  if (pc[-2] != 0x0340f809)
+    return _URC_END_OF_STACK;
+
+  for (i = 0; i < 32; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset = 8 * i;
+    }
+
+  /* Keep the next frame's sp.  This way we have a CFA that points
+     exactly to the register array.  */
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = 0;
+
+  /* DEPC is saved as the 35. register.  */
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_OFFSET;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset = 8 * 35;
+  fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN;
+
+  return _URC_NO_REASON;
+}
--- /dev/null
+++ b/gcc/config/mips/octeon.h
@@ -0,0 +1,68 @@
+/* Macros for mips*-octeon-* target.
+   Copyright (C) 2004, 2005, 2006 Cavium Networks.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+#define CVMX_SHARED_BSS_FLAGS (SECTION_WRITE | SECTION_BSS)
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION octeon_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION octeon_unique_section
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. This differs from the
+   generic version only in the use of cvmx_shared attribute.  */
+
+#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGN) \
+  do									\
+    {									\
+      if ((DECL) && TREE_CODE ((DECL)) == VAR_DECL 			\
+	  && lookup_attribute ("cvmx_shared", DECL_ATTRIBUTES (DECL)))	\
+	{  								\
+           fprintf ((STREAM), "%s", LOCAL_ASM_OP);			\
+           assemble_name ((STREAM), (NAME));				\
+           fprintf ((STREAM), "\n");					\
+	   octeon_output_shared_variable ((STREAM), (DECL), (NAME),     \
+					  (SIZE), (ALIGN));             \
+	 } 								\
+      else 								\
+	ASM_OUTPUT_ALIGNED_LOCAL (STREAM, NAME, SIZE, ALIGN);		\
+    }									\
+  while (0)
+
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON.  This differs from the mips
+   version only in the use of cvmx_shared attribute.  */
+
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGN)	   \
+  {									   \
+    if (TREE_CODE ((DECL)) == VAR_DECL			   		   \
+        && lookup_attribute ("cvmx_shared", DECL_ATTRIBUTES ((DECL))))	   \
+      {									   \
+	if (TREE_PUBLIC ((DECL)) && DECL_NAME ((DECL)))			   \
+	  targetm.asm_out.globalize_label (asm_out_file, (NAME));	   \
+	octeon_output_shared_variable ((STREAM), (DECL), (NAME),	   \
+				       (SIZE), (ALIGN));		   \
+      }									   \
+    else								   \
+      mips_output_aligned_decl_common ((STREAM), (DECL), (NAME), (SIZE),   \
+				       (ALIGN));			   \
+   }
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -211,6 +211,20 @@
     }
 })

+(define_predicate "mask_low_and_shift_operator"
+  (and (match_code "and")
+       (match_test "GET_CODE (XEXP (op, 0)) == ASHIFT
+		    && GET_CODE (XEXP (op, 1)) == CONST_INT
+		    && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT"))
+{
+  int len;
+
+  len = mask_low_and_shift_len (GET_MODE (op),
+			        INTVAL (XEXP (XEXP (op, 0), 1)),
+				INTVAL (XEXP (op, 1)));
+  return 0 < len && len <= 32;
+})
+
 (define_predicate "consttable_operand"
   (match_test "CONSTANT_P (op)"))

--- a/gcc/config/mips/sde.h
+++ b/gcc/config/mips/sde.h
@@ -19,6 +19,9 @@
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

+#undef TARGET_MIPS_SDE
+#define TARGET_MIPS_SDE 1
+
 #undef DRIVER_SELF_SPECS
 #define DRIVER_SELF_SPECS						\
   /* Make sure a -mips option is present.  This helps us to pick	\
@@ -90,7 +93,8 @@

 /* Use $5 as a temporary for both MIPS16 and non-MIPS16.  */
 #undef MIPS_EPILOGUE_TEMP_REGNUM
-#define MIPS_EPILOGUE_TEMP_REGNUM (GP_REG_FIRST + 5)
+#define MIPS_EPILOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 5)

 /* Using long will always be right for size_t and ptrdiff_t, since
    sizeof(long) must equal sizeof(void *), following from the setting
--- a/gcc/config/mips/sdemtk.h
+++ b/gcc/config/mips/sdemtk.h
@@ -19,6 +19,8 @@
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

+#define TARGET_MIPS_SDEMTK	1
+
 #define TARGET_OS_CPP_BUILTINS()			\
   do							\
     {							\
@@ -113,3 +115,12 @@
 /* ...nor does the call sequence preserve $31.  */
 #undef MIPS_SAVE_REG_FOR_PROFILING_P
 #define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) ((REGNO) == GP_REG_FIRST + 31)
+
+/* From mips.h, with mno-float option added.  */
+
+#undef MIPS_ARCH_FLOAT_SPEC
+#define MIPS_ARCH_FLOAT_SPEC \
+  "%{mhard-float|msoft-float|mno-float|march=mips*:; \
+     march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
+     |march=34kc|march=74kc|march=1004kc|march=5kc|march=octeon|march=xlr: -msoft-float; \
+     march=*: -mhard-float}"
--- /dev/null
+++ b/gcc/config/mips/t-crtfm
@@ -0,0 +1,9 @@
+
+EXTRA_MULTILIB_PARTS += crtfastmath.o
+
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/mips/crtfastmath.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtfastmath.o $(srcdir)/config/mips/crtfastmath.c
+
--- /dev/null
+++ b/gcc/config/mips/t-octeon-elf
@@ -0,0 +1,41 @@
+# Don't let CTOR_LIST end up in sdata section.
+
+CRTSTUFF_T_CFLAGS = -G 0 -fno-asynchronous-unwind-tables
+
+# Assemble startup files.
+
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+# N32 uses TFmode for long double.
+
+TPBIT = tp-bit.c
+
+tp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > tp-bit.c
+	echo '# define FLOAT_BIT_ORDER_MISMATCH' >> tp-bit.c
+	echo '#endif' >> tp-bit.c
+	echo '#if __LDBL_MANT_DIG__ == 113' >> tp-bit.c
+	echo '#define QUIET_NAN_NEGATED' >> tp-bit.c
+	echo '# define TFLOAT' >> tp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> tp-bit.c
+	echo '#endif' >> tp-bit.c
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+# Build both ABIs.
+
+MULTILIB_OPTIONS = mabi=n32/mabi=eabi/mabi=64
+MULTILIB_DIRNAMES = n32 eabi n64
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
--- a/gcc/config/mips/xlr.md
+++ b/gcc/config/mips/xlr.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the XLR.
-;;   Copyright (C) 2008 Free Software Foundation, Inc.
+;;   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
 ;;
 ;; xlr.md   Machine Description for the RMI XLR Microprocessor
 ;; This file is part of GCC.
@@ -31,7 +31,7 @@
 ;; Integer arithmetic instructions.
 (define_insn_reservation "ir_xlr_alu" 1
   (and (eq_attr "cpu" "xlr")
-       (eq_attr "type" "arith,shift,clz,const,unknown,multi,nop,trap"))
+       (eq_attr "type" "move,arith,shift,clz,logical,signext,const,unknown,multi,nop,trap"))
   "xlr_main_pipe")

 ;; Integer arithmetic instructions.
--- /dev/null
+++ b/gcc/config/rs6000/e500mc.h
@@ -0,0 +1,46 @@
+/* Core target definitions for GNU compiler
+   for IBM RS/6000 PowerPC targeted to embedded ELF systems.
+   Copyright (C) 1995, 1996, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Add -meabi to target flags.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI)
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC Embedded)");
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()          \
+  do                                      \
+    {                                     \
+      builtin_define_std ("PPC");         \
+      builtin_define ("__embedded__");    \
+      builtin_assert ("system=embedded"); \
+      builtin_assert ("cpu=powerpc");     \
+      builtin_assert ("machine=powerpc"); \
+      TARGET_OS_SYSV_CPP_BUILTINS ();     \
+    }                                     \
+  while (0)
+
+#undef CC1_EXTRA_SPEC
+#define CC1_EXTRA_SPEC "-maix-struct-return"
+
+#undef ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc%{m64:64} -me500mc"
--- a/gcc/config/rs6000/eabi.asm
+++ b/gcc/config/rs6000/eabi.asm
@@ -230,7 +230,7 @@
    r11		has the address of .LCTOC1 in it.
    r12		has the value to add to each pointer
    r13 .. r31	are unchanged */
-
+#ifdef _RELOCATABLE
 FUNC_START(__eabi_convert)
         cmplw	1,3,4				/* any pointers to convert? */
         subf	5,3,4				/* calculate number of words to convert */
@@ -285,5 +285,5 @@
         blr

 FUNC_END(__eabi_uconvert)
-
+#endif
 #endif
--- a/gcc/config/rs6000/eabi-ci.asm
+++ b/gcc/config/rs6000/eabi-ci.asm
@@ -98,6 +98,7 @@
 /* Head of __init function used for static constructors.  */
 	.section ".init","ax"
 	.align 2
+FUNC_START(_init)
 FUNC_START(__init)
 	stwu 1,-16(1)
 	mflr 0
@@ -106,6 +107,7 @@
 /* Head of __fini function used for static destructors.  */
 	.section ".fini","ax"
 	.align 2
+FUNC_START(_fini)
 FUNC_START(__fini)
 	stwu 1,-16(1)
 	mflr 0
--- a/gcc/config/rs6000/eabi.h
+++ b/gcc/config/rs6000/eabi.h
@@ -23,10 +23,6 @@
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI)

-/* Invoke an initializer function to set up the GOT.  */
-#define NAME__MAIN "__eabi"
-#define INVOKE__main
-
 #undef TARGET_VERSION
 #define TARGET_VERSION fprintf (stderr, " (PowerPC Embedded)");

@@ -42,3 +38,20 @@
       TARGET_OS_SYSV_CPP_BUILTINS ();     \
     }                                     \
   while (0)
+
+/* Add -te500v1 and -te500v2 options for convenience in generating
+   multilibs.  */
+#undef CC1_EXTRA_SPEC
+#define CC1_EXTRA_SPEC \
+  "%{te500v1: -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe} " \
+  "%{te500v2: -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe} " \
+  "%{te600: -mcpu=7400 -maltivec -mabi=altivec}"		    \
+  "%{te500mc: -mcpu=e500mc -maix-struct-return}"
+
+#undef ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC			\
+  "%{te500v1:-mppc -mspe -me500 ;		\
+     te500v2:-mppc -mspe -me500 ;		\
+     te600:-mppc -maltivec ;			\
+     te500mc:-mppc -me500mc ;			\
+     :-mppc%{m64:64}}"
--- a/gcc/config/rs6000/linux.h
+++ b/gcc/config/rs6000/linux.h
@@ -128,3 +128,29 @@
 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
 #endif
+
+/* Add -te500v1 and -te500v2 options for convenience in generating
+   multilibs.  */
+#undef CC1_EXTRA_SPEC
+#define CC1_EXTRA_SPEC \
+  "%{te500v1: -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe} " \
+  "%{te500v2: -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe} " \
+  "%{te600: -mcpu=7400 -maltivec -mabi=altivec}" \
+  "%{te500mc: -mcpu=e500mc}"
+
+#undef ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC			\
+  "%{te500v1:-mppc -mspe -me500 ;		\
+     te500v2:-mppc -mspe -me500 ;		\
+     te600:-mppc -maltivec ;			\
+     te500mc:-me500mc ;				\
+     :-mppc%{m64:64}}"
+
+/* The various C libraries each have their own subdirectory.  */
+#undef SYSROOT_SUFFIX_SPEC
+#define SYSROOT_SUFFIX_SPEC			\
+  "%{msoft-float:/nof ;				\
+     te600:/te600 ;				\
+     te500v1:/te500v1 ;				\
+     te500v2:/te500v2 ;				\
+     te500mc:/te500mc}"
--- /dev/null
+++ b/gcc/config/rs6000/option-defaults.h
@@ -0,0 +1,64 @@
+/* Definitions of default options for config/rs6000 configurations.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This header needs to be included after any other headers affecting
+   TARGET_DEFAULT.  */
+
+#if TARGET_AIX
+#define OPT_64 "maix64"
+#define OPT_32 "maix32"
+#else
+#define OPT_64 "m64"
+#define OPT_32 "m32"
+#endif
+
+#ifndef MASK_64BIT
+#define MASK_64BIT 0
+#endif
+
+#if TARGET_DEFAULT & MASK_64BIT
+#define OPT_ARCH64 "!"OPT_32
+#define OPT_ARCH32 OPT_32
+#else
+#define OPT_ARCH64 OPT_64
+#define OPT_ARCH32 "!"OPT_64
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified; likewise --with-cpu-32
+     and --with-cpu-64.
+   --with-tune is ignored if -mtune is specified; likewise --with-tune-32
+     and --with-tune-64.
+   --with-float is ignored if -mhard-float or -msoft-float are
+    specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{mcpu=*|te500mc|te500v1|te500v2|te600:;:-mcpu=%(VALUE)}" }, \
+  {"cpu_32", "%{" OPT_ARCH32 ":%{mcpu=*|te500mc|te500v1|te500v2|te600:;:-mcpu=%(VALUE)}}" }, \
+  {"cpu_64", "%{" OPT_ARCH64 ":%{mcpu=*|te500mc|te500v1|te500v2|te600:;:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }
--- a/gcc/config/rs6000/paired.md
+++ b/gcc/config/rs6000/paired.md
@@ -27,7 +27,7 @@
  (UNSPEC_EXTODD_V2SF      333)
 ])

-(define_insn "negv2sf2"
+(define_insn "paired_negv2sf2"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
 	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
   "TARGET_PAIRED_FLOAT"
@@ -41,7 +41,7 @@
   "ps_rsqrte %0,%1"
   [(set_attr "type" "fp")])

-(define_insn "absv2sf2"
+(define_insn "paired_absv2sf2"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
 	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
   "TARGET_PAIRED_FLOAT"
@@ -55,7 +55,7 @@
   "ps_nabs %0,%1"
   [(set_attr "type" "fp")])

-(define_insn "addv2sf3"
+(define_insn "paired_addv2sf3"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
 	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
 		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
@@ -63,7 +63,7 @@
   "ps_add %0,%1,%2"
   [(set_attr "type" "fp")])

-(define_insn "subv2sf3"
+(define_insn "paired_subv2sf3"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
         (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
                     (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
@@ -71,7 +71,7 @@
   "ps_sub %0,%1,%2"
   [(set_attr "type" "fp")])

-(define_insn "mulv2sf3"
+(define_insn "paired_mulv2sf3"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
 	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
 		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
@@ -86,7 +86,7 @@
   "ps_res %0,%1"
   [(set_attr "type" "fp")])

-(define_insn "divv2sf3"
+(define_insn "paired_divv2sf3"
   [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
 	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
 		  (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -919,6 +919,7 @@
 static bool rs6000_is_opaque_type (const_tree);
 static rtx rs6000_dwarf_register_span (rtx);
 static void rs6000_init_dwarf_reg_sizes_extra (tree);
+static int rs6000_commutative_operand_precedence (const_rtx, int);
 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 static rtx rs6000_tls_get_addr (void);
@@ -1194,6 +1195,10 @@
 #undef TARGET_VECTOR_OPAQUE_P
 #define TARGET_VECTOR_OPAQUE_P rs6000_is_opaque_type

+#undef TARGET_COMMUTATIVE_OPERAND_PRECEDENCE
+#define TARGET_COMMUTATIVE_OPERAND_PRECEDENCE \
+  rs6000_commutative_operand_precedence
+
 #undef TARGET_DWARF_REGISTER_SPAN
 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span

@@ -4682,16 +4687,19 @@
   if (TARGET_ALTIVEC)
     global_regs[VSCR_REGNO] = 1;

-  if (TARGET_ALTIVEC_ABI)
-    {
-      for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
-	call_used_regs[i] = call_really_used_regs[i] = 1;
+  /* If we are not using the AltiVec ABI, pretend that the normally
+     call-saved registers are also call-used.  We could use them
+     normally if we saved and restored them in the prologue; that
+     would require using the alignment padding around the register
+     save area, and some care with unwinding information.  */
+  if (! TARGET_ALTIVEC_ABI)
+    for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
+      call_used_regs[i] = call_really_used_regs[i] = 1;

-      /* AIX reserves VR20:31 in non-extended ABI mode.  */
-      if (TARGET_XCOFF)
-	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
-	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
-    }
+  if (TARGET_ALTIVEC_ABI && TARGET_XCOFF)
+    /* AIX reserves VR20:31 in non-extended ABI mode.  */
+    for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
+      fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
 }

 /* Try to output insns to set TARGET equal to the constant C if it can
@@ -7507,10 +7515,10 @@
   { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sums", ALTIVEC_BUILTIN_VEC_SUMS },
   { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR },

-  { 0, CODE_FOR_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 },
-  { 0, CODE_FOR_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 },
-  { 0, CODE_FOR_subv2sf3, "__builtin_paired_subv2sf3", PAIRED_BUILTIN_SUBV2SF3 },
-  { 0, CODE_FOR_mulv2sf3, "__builtin_paired_mulv2sf3", PAIRED_BUILTIN_MULV2SF3 },
+  { 0, CODE_FOR_paired_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 },
+  { 0, CODE_FOR_paired_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 },
+  { 0, CODE_FOR_paired_subv2sf3, "__builtin_paired_subv2sf3", PAIRED_BUILTIN_SUBV2SF3 },
+  { 0, CODE_FOR_paired_mulv2sf3, "__builtin_paired_mulv2sf3", PAIRED_BUILTIN_MULV2SF3 },
   { 0, CODE_FOR_paired_muls0, "__builtin_paired_muls0", PAIRED_BUILTIN_MULS0 },
   { 0, CODE_FOR_paired_muls1, "__builtin_paired_muls1", PAIRED_BUILTIN_MULS1 },
   { 0, CODE_FOR_paired_merge00, "__builtin_paired_merge00", PAIRED_BUILTIN_MERGE00 },
@@ -7519,10 +7527,10 @@
   { 0, CODE_FOR_paired_merge11, "__builtin_paired_merge11", PAIRED_BUILTIN_MERGE11 },

   /* Place holder, leave as first spe builtin.  */
-  { 0, CODE_FOR_spe_evaddw, "__builtin_spe_evaddw", SPE_BUILTIN_EVADDW },
-  { 0, CODE_FOR_spe_evand, "__builtin_spe_evand", SPE_BUILTIN_EVAND },
+  { 0, CODE_FOR_addv2si3, "__builtin_spe_evaddw", SPE_BUILTIN_EVADDW },
+  { 0, CODE_FOR_andv2si3, "__builtin_spe_evand", SPE_BUILTIN_EVAND },
   { 0, CODE_FOR_spe_evandc, "__builtin_spe_evandc", SPE_BUILTIN_EVANDC },
-  { 0, CODE_FOR_spe_evdivws, "__builtin_spe_evdivws", SPE_BUILTIN_EVDIVWS },
+  { 0, CODE_FOR_divv2si3, "__builtin_spe_evdivws", SPE_BUILTIN_EVDIVWS },
   { 0, CODE_FOR_spe_evdivwu, "__builtin_spe_evdivwu", SPE_BUILTIN_EVDIVWU },
   { 0, CODE_FOR_spe_eveqv, "__builtin_spe_eveqv", SPE_BUILTIN_EVEQV },
   { 0, CODE_FOR_spe_evfsadd, "__builtin_spe_evfsadd", SPE_BUILTIN_EVFSADD },
@@ -7798,7 +7806,7 @@

   /* The SPE unary builtins must start with SPE_BUILTIN_EVABS and
      end with SPE_BUILTIN_EVSUBFUSIAAW.  */
-  { 0, CODE_FOR_spe_evabs, "__builtin_spe_evabs", SPE_BUILTIN_EVABS },
+  { 0, CODE_FOR_absv2si2, "__builtin_spe_evabs", SPE_BUILTIN_EVABS },
   { 0, CODE_FOR_spe_evaddsmiaaw, "__builtin_spe_evaddsmiaaw", SPE_BUILTIN_EVADDSMIAAW },
   { 0, CODE_FOR_spe_evaddssiaaw, "__builtin_spe_evaddssiaaw", SPE_BUILTIN_EVADDSSIAAW },
   { 0, CODE_FOR_spe_evaddumiaaw, "__builtin_spe_evaddumiaaw", SPE_BUILTIN_EVADDUMIAAW },
@@ -7830,9 +7838,9 @@
   /* Place-holder.  Leave as last unary SPE builtin.  */
   { 0, CODE_FOR_spe_evsubfusiaaw, "__builtin_spe_evsubfusiaaw", SPE_BUILTIN_EVSUBFUSIAAW },

-  { 0, CODE_FOR_absv2sf2, "__builtin_paired_absv2sf2", PAIRED_BUILTIN_ABSV2SF2 },
+  { 0, CODE_FOR_paired_absv2sf2, "__builtin_paired_absv2sf2", PAIRED_BUILTIN_ABSV2SF2 },
   { 0, CODE_FOR_nabsv2sf2, "__builtin_paired_nabsv2sf2", PAIRED_BUILTIN_NABSV2SF2 },
-  { 0, CODE_FOR_negv2sf2, "__builtin_paired_negv2sf2", PAIRED_BUILTIN_NEGV2SF2 },
+  { 0, CODE_FOR_paired_negv2sf2, "__builtin_paired_negv2sf2", PAIRED_BUILTIN_NEGV2SF2 },
   { 0, CODE_FOR_sqrtv2sf2, "__builtin_paired_sqrtv2sf2", PAIRED_BUILTIN_SQRTV2SF2 },
   { 0, CODE_FOR_resv2sf2, "__builtin_paired_resv2sf2", PAIRED_BUILTIN_RESV2SF2 }
 };
@@ -9370,6 +9378,8 @@
 static void
 rs6000_init_builtins (void)
 {
+  tree tdecl;
+
   V2SI_type_node = build_vector_type (intSI_type_node, 2);
   V2SF_type_node = build_vector_type (float_type_node, 2);
   V4HI_type_node = build_vector_type (intHI_type_node, 4);
@@ -9407,60 +9417,75 @@
   float_type_internal_node = float_type_node;
   void_type_internal_node = void_type_node;

-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__bool char"),
-					    bool_char_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__bool short"),
-					    bool_short_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__bool int"),
-					    bool_int_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__pixel"),
-					    pixel_type_node));
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__bool char"),
+		      bool_char_type_node);
+  TYPE_NAME (bool_char_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__bool short"),
+		      bool_short_type_node);
+  TYPE_NAME (bool_short_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__bool int"),
+		      bool_int_type_node);
+  TYPE_NAME (bool_int_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__pixel"),
+		      pixel_type_node);
+  TYPE_NAME (pixel_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);

   bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
   bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
   bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
   pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);

-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector unsigned char"),
-					    unsigned_V16QI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector signed char"),
-					    V16QI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector __bool char"),
-					    bool_V16QI_type_node));
-
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector unsigned short"),
-					    unsigned_V8HI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector signed short"),
-					    V8HI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector __bool short"),
-					    bool_V8HI_type_node));
-
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector unsigned int"),
-					    unsigned_V4SI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector signed int"),
-					    V4SI_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector __bool int"),
-					    bool_V4SI_type_node));
-
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector float"),
-					    V4SF_type_node));
-  (*lang_hooks.decls.pushdecl) (build_decl (TYPE_DECL,
-					    get_identifier ("__vector __pixel"),
-					    pixel_V8HI_type_node));
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector unsigned char"),
+		      unsigned_V16QI_type_node);
+  TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector signed char"),
+		      V16QI_type_node);
+  TYPE_NAME (V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector __bool char"),
+		      bool_V16QI_type_node);
+  TYPE_NAME ( bool_V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector unsigned short"),
+		      unsigned_V8HI_type_node);
+  TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector signed short"),
+		      V8HI_type_node);
+  TYPE_NAME (V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector __bool short"),
+		      bool_V8HI_type_node);
+  TYPE_NAME (bool_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector unsigned int"),
+		      unsigned_V4SI_type_node);
+  TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector signed int"),
+		      V4SI_type_node);
+  TYPE_NAME (V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector __bool int"),
+		      bool_V4SI_type_node);
+  TYPE_NAME (bool_V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector float"),
+		      V4SF_type_node);
+  TYPE_NAME (V4SF_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (TYPE_DECL, get_identifier ("__vector __pixel"),
+		      pixel_V8HI_type_node);
+  TYPE_NAME (pixel_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);

   if (TARGET_PAIRED_FLOAT)
     paired_init_builtins ();
@@ -15843,7 +15868,7 @@
 no_global_regs_above (int first, bool gpr)
 {
   int i;
-  for (i = first; i < gpr ? 32 : 64 ; i++)
+  for (i = first; i < (gpr ? 32 : 64); i++)
     if (global_regs[i])
       return false;
   return true;
@@ -15869,11 +15894,11 @@
   int regno = gpr ? info->first_gp_reg_save : (info->first_fp_reg_save - 32);
   rtx sym;
   int select = ((savep ? 1 : 0) << 2
-		| (gpr
+		| (TARGET_SPE_ABI
 		   /* On the SPE, we never have any FPRs, but we do have
 		      32/64-bit versions of the routines.  */
-		   ? (TARGET_SPE_ABI && info->spe_64bit_regs_used ? 1 : 0)
-		   : 0) << 1
+		   ? (info->spe_64bit_regs_used ? 1 : 0)
+		   : (gpr ? 1 : 0)) << 1
 		| (exitp ? 1: 0));

   /* Don't generate bogus routine names.  */
@@ -15908,6 +15933,7 @@

       sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
 	= gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+      SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
     }

   return sym;
@@ -16098,6 +16124,14 @@
 	savres_gprs_inline = savres_gprs_inline || using_multiple_p;
     }

+  /* Code intended for use in shared libraries cannot be reliably linked
+     with out-of-line prologues and epilogues.  */
+  if (flag_pic)
+    {
+      savres_gprs_inline = 1;
+      savres_fprs_inline = 1;
+    }
+
   return (using_multiple_p
 	  | (savres_fprs_inline << 1)
 	  | (savres_gprs_inline << 2));
@@ -16122,7 +16156,7 @@
   int using_store_multiple;
   int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
                               && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
-                              && !call_used_regs[STATIC_CHAIN_REGNUM]);
+                              && call_used_regs[STATIC_CHAIN_REGNUM]);
   HOST_WIDE_INT sp_offset = 0;

   if (TARGET_FIX_AND_CONTINUE)
@@ -16924,8 +16958,9 @@
 				 || (cfun->calls_alloca
 				     && !frame_pointer_needed));
   restore_lr = (info->lr_save_p
-		&& restoring_GPRs_inline
-		&& restoring_FPRs_inline);
+		&& (restoring_GPRs_inline
+		    || (restoring_FPRs_inline
+			&& info->first_fp_reg_save < 64)));

   if (WORLD_SAVE_P (info))
     {
@@ -17197,7 +17232,7 @@

   /* Get the old lr if we saved it.  If we are restoring registers
      out-of-line, then the out-of-line routines can do this for us.  */
-  if (restore_lr)
+  if (restore_lr && restoring_GPRs_inline)
     {
       rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx,
 				      info->lr_save_offset + sp_offset);
@@ -17216,7 +17251,7 @@
     }

   /* Set LR here to try to overlap restores below.  */
-  if (restore_lr)
+  if (restore_lr && restoring_GPRs_inline)
     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO),
 		    gen_rtx_REG (Pmode, 0));

@@ -17396,6 +17431,18 @@
           }
     }

+  if (restore_lr && !restoring_GPRs_inline)
+    {
+      rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx,
+				      info->lr_save_offset + sp_offset);
+
+      emit_move_insn (gen_rtx_REG (Pmode, 0), mem);
+    }
+
+  if (restore_lr && !restoring_GPRs_inline)
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO),
+		    gen_rtx_REG (Pmode, 0));
+
   /* Restore fpr's if we need to do it without calling a function.  */
   if (restoring_FPRs_inline)
     for (i = 0; i < 64 - info->first_fp_reg_save; i++)
@@ -22163,6 +22210,30 @@
     return 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
 }

+/* Return a value indicating whether OP, an operand of a commutative
+   operation, is preferred as the first or second operand.  The higher
+   the value, the stronger the preference for being the first operand.
+   We use negative values to indicate a preference for the first operand
+   and positive values for the second operand.
+   VALUE is the default precedence for OP; see rtlanal.c:
+   commutative_operand_precendece.  */
+
+static int
+rs6000_commutative_operand_precedence (const_rtx op, int value)
+{
+  /* Prefer pointer objects over non pointer objects.
+     For rationale see PR28690.  */
+  if (GET_RTX_CLASS (GET_CODE (op)) == RTX_OBJ
+      && ((REG_P (op) && REG_POINTER (op))
+	  || (MEM_P (op) && MEM_POINTER (op))))
+    /* value = -1 */;
+  else
+    /* value = -2 */
+    --value;
+
+  return value;
+}
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */

@@ -22686,12 +22757,16 @@
 static rtx
 rs6000_dwarf_register_span (rtx reg)
 {
-  unsigned regno;
+  rtx parts[8];
+  int i, words;
+  unsigned regno = REGNO (reg);
+  enum machine_mode mode = GET_MODE (reg);

   if (TARGET_SPE
+      && regno < 32
       && (SPE_VECTOR_MODE (GET_MODE (reg))
-	  || (TARGET_E500_DOUBLE
-	      && (GET_MODE (reg) == DFmode || GET_MODE (reg) == DDmode))))
+	  || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
+	      && mode != SFmode && mode != SDmode && mode != SCmode)))
     ;
   else
     return NULL_RTX;
@@ -22701,15 +22776,23 @@
   /* The duality of the SPE register size wreaks all kinds of havoc.
      This is a way of distinguishing r0 in 32-bits from r0 in
      64-bits.  */
-  return
-    gen_rtx_PARALLEL (VOIDmode,
-		      BYTES_BIG_ENDIAN
-		      ? gen_rtvec (2,
-				   gen_rtx_REG (SImode, regno + 1200),
-				   gen_rtx_REG (SImode, regno))
-		      : gen_rtvec (2,
-				   gen_rtx_REG (SImode, regno),
-				   gen_rtx_REG (SImode, regno + 1200)));
+  words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
+  gcc_assert (words <= 4);
+  for (i = 0; i < words; i++, regno++)
+    {
+      if (BYTES_BIG_ENDIAN)
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno + 1200);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
+	}
+      else
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200);
+	}
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
 }

 /* Fill in sizes for SPE register high parts in table used by unwinder.  */
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -368,16 +368,6 @@
     previous_group
   };

-/* Support for a compile-time default CPU, et cetera.  The rules are:
-   --with-cpu is ignored if -mcpu is specified.
-   --with-tune is ignored if -mtune is specified.
-   --with-float is ignored if -mhard-float or -msoft-float are
-    specified.  */
-#define OPTION_DEFAULT_SPECS \
-  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
-  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
-  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }
-
 /* rs6000_select[0] is reserved for the default cpu defined via --with-cpu */
 struct rs6000_cpu_select
 {
@@ -794,8 +784,8 @@
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
    /* AltiVec registers.  */			   \
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    1, 1						   \
    , 1, 1, 1                                       \
 }
@@ -813,8 +803,8 @@
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
    /* AltiVec registers.  */			   \
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    0, 0						   \
    , 0, 0, 0                                       \
 }
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -14703,9 +14703,9 @@
   [(match_parallel 0 "any_parallel_operand"
 		   [(clobber (reg:P 65))
 		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
-                    (use (match_operand:P 2 "gpc_reg_operand" "r"))
-		    (set (match_operand:P 3 "memory_operand" "=m")
-			 (match_operand:P 4 "gpc_reg_operand" "r"))])]
+                    (use (reg:P 11))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
   ""
   "bl %z1"
   [(set_attr "type" "branch")
@@ -14715,9 +14715,9 @@
   [(match_parallel 0 "any_parallel_operand"
 		   [(clobber (reg:P 65))
 		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
-                    (use (match_operand:P 2 "gpc_reg_operand" "r"))
-		    (set (match_operand:DF 3 "memory_operand" "=m")
-			 (match_operand:DF 4 "gpc_reg_operand" "f"))])]
+                    (use (reg:P 11))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "f"))])]
   ""
   "bl %z1"
   [(set_attr "type" "branch")
@@ -14810,9 +14810,9 @@
  [(match_parallel 0 "any_parallel_operand"
                   [(clobber (match_operand:P 1 "register_operand" "=l"))
                    (use (match_operand:P 2 "symbol_ref_operand" "s"))
-                   (use (match_operand:P 3 "gpc_reg_operand" "r"))
-		   (set (match_operand:P 4 "gpc_reg_operand" "=r")
-			(match_operand:P 5 "memory_operand" "m"))])]
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
  ""
  "bl %z2"
  [(set_attr "type" "branch")
@@ -14823,9 +14823,9 @@
                   [(return)
 		   (clobber (match_operand:P 1 "register_operand" "=l"))
 		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
-                   (use (match_operand:P 3 "gpc_reg_operand" "r"))
-		   (set (match_operand:P 4 "gpc_reg_operand" "=r")
-			(match_operand:P 5 "memory_operand" "m"))])]
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
  ""
  "b %z2"
  [(set_attr "type" "branch")
@@ -14836,9 +14836,9 @@
                   [(return)
 		   (clobber (match_operand:P 1 "register_operand" "=l"))
 		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
-                   (use (match_operand:P 3 "gpc_reg_operand" "r"))
-		   (set (match_operand:DF 4 "gpc_reg_operand" "=f")
-			(match_operand:DF 5 "memory_operand" "m"))])]
+                   (use (reg:P 11))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=f")
+			(match_operand:DF 4 "memory_operand" "m"))])]
  ""
  "b %z2"
  [(set_attr "type" "branch")
@@ -14889,6 +14889,120 @@
 }"
   [(set_attr "type" "load")])

+;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems.
+
+(define_expand "absv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+                              gen_rtx_ABS (V2SFmode, operands[1])));
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)));
+      DONE;
+    }
+}")
+
+(define_expand "negv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+                              gen_rtx_NEG (V2SFmode, operands[1])));
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO)));
+      DONE;
+    }
+}")
+
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_PLUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MINUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "mulv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MULT (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "divv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_DIV (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+

 (include "sync.md")
 (include "altivec.md")
--- a/gcc/config/rs6000/spe.md
+++ b/gcc/config/rs6000/spe.md
@@ -164,7 +164,7 @@

 ;; SPE SIMD instructions

-(define_insn "spe_evabs"
+(define_insn "absv2si2"
   [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
 	(abs:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
   "TARGET_SPE"
@@ -181,7 +181,7 @@
   [(set_attr "type" "vecsimple")
    (set_attr  "length" "4")])

-(define_insn "spe_evand"
+(define_insn "andv2si3"
   [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
         (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
 		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
@@ -1898,7 +1898,7 @@
   [(set_attr "type" "veccomplex")
    (set_attr  "length" "4")])

-(define_insn "spe_evaddw"
+(define_insn "addv2si3"
   [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
         (plus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
 		   (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
@@ -2028,7 +2028,7 @@
   [(set_attr "type" "veccomplex")
    (set_attr  "length" "4")])

-(define_insn "spe_evdivws"
+(define_insn "divv2si3"
   [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
         (div:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
 		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))
@@ -3156,9 +3156,9 @@
   [(match_parallel 0 "any_parallel_operand"
 		   [(clobber (reg:P 65))
 		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
-		    (use (match_operand:P 2 "gpc_reg_operand" "r"))
-		    (set (match_operand:V2SI 3 "memory_operand" "=m")
-			 (match_operand:V2SI 4 "gpc_reg_operand" "r"))])]
+		    (use (reg:P 11))
+		    (set (match_operand:V2SI 2 "memory_operand" "=m")
+			 (match_operand:V2SI 3 "gpc_reg_operand" "r"))])]
   "TARGET_SPE_ABI"
   "bl %z1"
   [(set_attr "type" "branch")
@@ -3168,9 +3168,9 @@
  [(match_parallel 0 "any_parallel_operand"
 		  [(clobber (reg:P 65))
 		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
-		   (use (match_operand:P 2 "gpc_reg_operand" "r"))
-		   (set (match_operand:V2SI 3 "gpc_reg_operand" "=r")
-			(match_operand:V2SI 4 "memory_operand" "m"))])]
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
  "TARGET_SPE_ABI"
  "bl %z1"
  [(set_attr "type" "branch")
@@ -3181,9 +3181,9 @@
 		  [(return)
 		   (clobber (reg:P 65))
 		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
-		   (use (match_operand:P 2 "gpc_reg_operand" "r"))
-		   (set (match_operand:V2SI 3 "gpc_reg_operand" "=r")
-			(match_operand:V2SI 4 "memory_operand" "m"))])]
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
  "TARGET_SPE_ABI"
  "b %z1"
  [(set_attr "type" "branch")
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -619,6 +619,9 @@
 #define CC1_SECURE_PLT_DEFAULT_SPEC ""
 #endif

+#undef CC1_EXTRA_SPEC
+#define CC1_EXTRA_SPEC ""
+
 /* Pass -G xxx to the compiler and set correct endian mode.  */
 #define	CC1_SPEC "%{G*} %(cc1_cpu) \
 %{mlittle|mlittle-endian: %(cc1_endian_little);           \
@@ -643,7 +646,7 @@
 %{msdata: -msdata=default} \
 %{mno-sdata: -msdata=none} \
 %{!mbss-plt: %{!msecure-plt: %(cc1_secure_plt_default)}} \
-%{profile: -p}"
+%{profile: -p}" CC1_EXTRA_SPEC

 /* Don't put -Y P,<path> for cross compilers.  */
 #ifndef CROSS_DIRECTORY_STRUCTURE
@@ -843,15 +846,15 @@
 #define CPP_OS_MVME_SPEC ""

 /* PowerPC simulator based on netbsd system calls support.  */
-#define LIB_SIM_SPEC "--start-group -lsim -lc --end-group"
+#define LIB_SIM_SPEC LIB_DEFAULT_SPEC

-#define	STARTFILE_SIM_SPEC "ecrti.o%s sim-crt0.o%s crtbegin.o%s"
+#define	STARTFILE_SIM_SPEC "ecrti.o%s crtbegin.o%s"

-#define	ENDFILE_SIM_SPEC "crtend.o%s ecrtn.o%s"
+#define	ENDFILE_SIM_SPEC "crtend.o%s ecrtn.o%s -Tsim-hosted.ld"

 #define LINK_START_SIM_SPEC ""

-#define LINK_OS_SIM_SPEC "-m elf32ppcsim"
+#define LINK_OS_SIM_SPEC ""

 #define CPP_OS_SIM_SPEC ""

--- a/gcc/config/rs6000/t-ppccomm
+++ b/gcc/config/rs6000/t-ppccomm
@@ -3,10 +3,23 @@
 LIB2FUNCS_EXTRA += tramp.S $(srcdir)/config/rs6000/darwin-ldouble.c

 # These can't end up in shared libgcc
-LIB2FUNCS_STATIC_EXTRA = eabi.S
-
-eabi.S: $(srcdir)/config/rs6000/eabi.asm
-	cat $(srcdir)/config/rs6000/eabi.asm > eabi.S
+LIB2FUNCS_STATIC_EXTRA = \
+  crtsavfpr.S crtresfpr.S \
+  crtsavgpr.S crtresgpr.S \
+  crtresxfpr.S crtresxgpr.S \
+  e500crtres32gpr.S \
+  e500crtres64gpr.S \
+  e500crtres64gprctr.S \
+  e500crtrest32gpr.S \
+  e500crtrest64gpr.S \
+  e500crtresx32gpr.S \
+  e500crtresx64gpr.S \
+  e500crtsav32gpr.S \
+  e500crtsav64gpr.S \
+  e500crtsav64gprctr.S \
+  e500crtsavg32gpr.S \
+  e500crtsavg64gpr.S \
+  e500crtsavg64gprctr.S

 tramp.S: $(srcdir)/config/rs6000/tramp.asm
 	cat $(srcdir)/config/rs6000/tramp.asm > tramp.S
@@ -36,6 +49,63 @@
 ncrtn.S: $(srcdir)/config/rs6000/sol-cn.asm
 	cat $(srcdir)/config/rs6000/sol-cn.asm >ncrtn.S

+crtsavfpr.S: $(srcdir)/config/rs6000/crtsavfpr.asm
+	cat $(srcdir)/config/rs6000/crtsavfpr.asm >crtsavfpr.S
+
+crtresfpr.S: $(srcdir)/config/rs6000/crtresfpr.asm
+	cat $(srcdir)/config/rs6000/crtresfpr.asm >crtresfpr.S
+
+crtsavgpr.S: $(srcdir)/config/rs6000/crtsavgpr.asm
+	cat $(srcdir)/config/rs6000/crtsavgpr.asm >crtsavgpr.S
+
+crtresgpr.S: $(srcdir)/config/rs6000/crtresgpr.asm
+	cat $(srcdir)/config/rs6000/crtresgpr.asm >crtresgpr.S
+
+crtresxfpr.S: $(srcdir)/config/rs6000/crtresxfpr.asm
+	cat $(srcdir)/config/rs6000/crtresxfpr.asm >crtresxfpr.S
+
+crtresxgpr.S: $(srcdir)/config/rs6000/crtresxgpr.asm
+	cat $(srcdir)/config/rs6000/crtresxgpr.asm >crtresxgpr.S
+
+e500crtres32gpr.S: $(srcdir)/config/rs6000/e500crtres32gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtres32gpr.asm >e500crtres32gpr.S
+
+e500crtres64gpr.S: $(srcdir)/config/rs6000/e500crtres64gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtres64gpr.asm >e500crtres64gpr.S
+
+e500crtres64gprctr.S: $(srcdir)/config/rs6000/e500crtres64gprctr.asm
+	cat $(srcdir)/config/rs6000/e500crtres64gprctr.asm >e500crtres64gprctr.S
+
+e500crtrest32gpr.S: $(srcdir)/config/rs6000/e500crtrest32gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtrest32gpr.asm >e500crtrest32gpr.S
+
+e500crtrest64gpr.S: $(srcdir)/config/rs6000/e500crtrest64gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtrest64gpr.asm >e500crtrest64gpr.S
+
+e500crtresx32gpr.S: $(srcdir)/config/rs6000/e500crtresx32gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtresx32gpr.asm >e500crtresx32gpr.S
+
+e500crtresx64gpr.S: $(srcdir)/config/rs6000/e500crtresx64gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtresx64gpr.asm >e500crtresx64gpr.S
+
+e500crtsav32gpr.S: $(srcdir)/config/rs6000/e500crtsav32gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtsav32gpr.asm >e500crtsav32gpr.S
+
+e500crtsav64gpr.S: $(srcdir)/config/rs6000/e500crtsav64gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtsav64gpr.asm >e500crtsav64gpr.S
+
+e500crtsav64gprctr.S: $(srcdir)/config/rs6000/e500crtsav64gprctr.asm
+	cat $(srcdir)/config/rs6000/e500crtsav64gprctr.asm >e500crtsav64gprctr.S
+
+e500crtsavg32gpr.S: $(srcdir)/config/rs6000/e500crtsavg32gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtsavg32gpr.asm >e500crtsavg32gpr.S
+
+e500crtsavg64gpr.S: $(srcdir)/config/rs6000/e500crtsavg64gpr.asm
+	cat $(srcdir)/config/rs6000/e500crtsavg64gpr.asm >e500crtsavg64gpr.S
+
+e500crtsavg64gprctr.S: $(srcdir)/config/rs6000/e500crtsavg64gprctr.asm
+	cat $(srcdir)/config/rs6000/e500crtsavg64gprctr.asm >e500crtsavg64gprctr.S
+
 # Build multiple copies of ?crt{i,n}.o, one for each target switch.
 $(T)ecrti$(objext): ecrti.S
 	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ecrti.S -o $(T)ecrti$(objext)
@@ -49,6 +119,63 @@
 $(T)ncrtn$(objext): ncrtn.S
 	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ncrtn.S -o $(T)ncrtn$(objext)

+$(T)crtsavfpr$(objext): crtsavfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtsavfpr.S -o $(T)crtsavfpr$(objext)
+
+$(T)crtresfpr$(objext): crtresfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresfpr.S -o $(T)crtresfpr$(objext)
+
+$(T)crtsavgpr$(objext): crtsavgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtsavgpr.S -o $(T)crtsavgpr$(objext)
+
+$(T)crtresgpr$(objext): crtresgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresgpr.S -o $(T)crtresgpr$(objext)
+
+$(T)crtresxfpr$(objext): crtresxfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresxfpr.S -o $(T)crtresxfpr$(objext)
+
+$(T)crtresxgpr$(objext): crtresxgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresxgpr.S -o $(T)crtresxgpr$(objext)
+
+$(T)e500crtres32gpr$(objext): e500crtres32gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtres32gpr.S -o $(T)e500crtres32gpr$(objext)
+
+$(T)e500crtres64gpr$(objext): e500crtres64gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtres64gpr.S -o $(T)e500crtres64gpr$(objext)
+
+$(T)e500crtres64gprctr$(objext): e500crtres64gprctr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtres64gprctr.S -o $(T)e500crtres64gprctr$(objext)
+
+$(T)e500crtrest32gpr$(objext): e500crtrest32gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtrest32gpr.S -o $(T)e500crtrest32gpr$(objext)
+
+$(T)e500crtrest64gpr$(objext): e500crtrest64gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtrest64gpr.S -o $(T)e500crtrest64gpr$(objext)
+
+$(T)e500crtresx32gpr$(objext): e500crtresx32gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtresx32gpr.S -o $(T)e500crtresx32gpr$(objext)
+
+$(T)e500crtresx64gpr$(objext): e500crtresx64gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtresx64gpr.S -o $(T)e500crtresx64gpr$(objext)
+
+$(T)e500crtsav32gpr$(objext): e500crtsav32gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsav32gpr.S -o $(T)e500crtsav32gpr$(objext)
+
+$(T)e500crtsav64gpr$(objext): e500crtsav64gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsav64gpr.S -o $(T)e500crtsav64gpr$(objext)
+
+$(T)e500crtsav64gprctr$(objext): e500crtsav64gprctr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsav64gprctr.S -o $(T)e500crtsav64gprctr$(objext)
+
+$(T)e500crtsavg32gpr$(objext): e500crtsavg32gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsavg32gpr.S -o $(T)e500crtsavg32gpr$(objext)
+
+$(T)e500crtsavg64gpr$(objext): e500crtsavg64gpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsavg64gpr.S -o $(T)e500crtsavg64gpr$(objext)
+
+$(T)e500crtsavg64gprctr$(objext): e500crtsavg64gprctr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c e500crtsavg64gprctr.S -o $(T)e500crtsavg64gprctr$(objext)
+
 # It is important that crtbegin.o, etc., aren't surprised by stuff in .sdata.
 CRTSTUFF_T_CFLAGS = -msdata=none
 # Make sure crt*.o are built with -fPIC even if configured with
--- /dev/null
+++ b/gcc/config/rs6000/t-ppc-e500mc
@@ -0,0 +1,12 @@
+# Multilibs for powerpc embedded ELF targets.
+
+MULTILIB_OPTIONS	=
+
+MULTILIB_DIRNAMES	=
+
+MULTILIB_EXCEPTIONS	=
+
+MULTILIB_EXTRA_OPTS	= mno-eabi mstrict-align
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT} \
+			  ${MULTILIB_MATCHES_ENDIAN}
--- a/gcc/config/sh/lib1funcs.asm
+++ b/gcc/config/sh/lib1funcs.asm
@@ -2080,8 +2080,9 @@
 GLOBAL(ic_invalidate):
 	ocbwb	@r4
 	synco
-	rts
 	icbi	@r4
+	rts
+	  nop
 	ENDFUNC(GLOBAL(ic_invalidate))
 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
 	/* For system code, we use ic_invalidate_line_i, but user code
@@ -2147,8 +2148,10 @@
 GLOBAL(ic_invalidate_array):
 	add	r1,r4
 	synco
-	rts
 	icbi	@r4
+	rts
+	  nop
+	.align 2
 	.long	0
 	ENDFUNC(GLOBAL(ic_invalidate_array))
 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
--- a/gcc/config/sh/linux-unwind.h
+++ b/gcc/config/sh/linux-unwind.h
@@ -24,7 +24,10 @@


 /* Do code reading to identify a signal frame, and set the frame
-   state data appropriately.  See unwind-dw2.c for the structs.  */
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc

 #include <signal.h>
 #include <sys/ucontext.h>
@@ -248,3 +251,5 @@
   return _URC_NO_REASON;
 }
 #endif /* defined (__SH5__) */
+
+#endif /* inhibit_libc */
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -712,8 +712,9 @@
       /* Never run scheduling before reload, since that can		\
 	 break global alloc, and generates slower code anyway due	\
 	 to the pressure on R0.  */					\
-      /* Enable sched1 for SH4; ready queue will be reordered by	\
-	 the target hooks when pressure is high. We can not do this for \
+      /* Enable sched1 for SH4 if the user explicitly requests.		\
+	 When sched1 is enabled, the ready queue will be reordered by	\
+	 the target hooks if pressure is high.  We can not do this for	\
 	 PIC, SH3 and lower as they give spill failures for R0.  */	\
       if (!TARGET_HARD_SH4 || flag_pic)					\
         flag_schedule_insns = 0;		 			\
@@ -728,6 +729,8 @@
 	    warning (0, "ignoring -fschedule-insns because of exception handling bug");	\
 	  flag_schedule_insns = 0;		 			\
 	}								\
+      else if (flag_schedule_insns == 2)				\
+	flag_schedule_insns = 0;		 			\
     }									\
 									\
   if (align_loops == 0)							\
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -123,12 +123,12 @@
    %{YP,*} \
    %{R*} \
    %{compat-bsd: \
-     %{!YP,*:%{p|pg:-Y P,/usr/ucblib:/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
-             %{!p:%{!pg:-Y P,/usr/ucblib:/usr/ccs/lib:/usr/lib}}} \
-             -R /usr/ucblib} \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ucblib:%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/usr/lib} \
+             %{!p:%{!pg:-Y P,%R/usr/ucblib:%R/usr/ccs/lib:%R/usr/lib}}} \
+             -R %R/usr/ucblib} \
    %{!compat-bsd: \
-     %{!YP,*:%{p|pg:-Y P,/usr/ccs/lib/libp:/usr/lib/libp:/usr/ccs/lib:/usr/lib} \
-             %{!p:%{!pg:-Y P,/usr/ccs/lib:/usr/lib}}}}"
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/usr/lib} \
+             %{!p:%{!pg:-Y P,%R/usr/ccs/lib:%R/usr/lib}}}}"

 #undef LINK_ARCH32_SPEC
 #define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE
--- a/gcc/config/sparc/linux64.h
+++ b/gcc/config/sparc/linux64.h
@@ -40,10 +40,15 @@
    in a Medium/Low code model environment.  */

 #undef TARGET_DEFAULT
+#ifdef BIARCH_32BIT_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_APP_REGS + MASK_FPU)
+#else
 #define TARGET_DEFAULT \
   (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
    + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
 #endif
+#endif

 /* This must be v9a not just v9 because by default we enable
    -mvis.  */
--- a/gcc/config/sparc/sol2-bi.h
+++ b/gcc/config/sparc/sol2-bi.h
@@ -172,12 +172,12 @@
    %{YP,*} \
    %{R*} \
    %{compat-bsd: \
-     %{!YP,*:%{p|pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
-       %{!p:%{!pg:-Y P,/usr/ucblib/sparcv9:/usr/lib/sparcv9}}} \
-     -R /usr/ucblib/sparcv9} \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/sparcv9}}} \
+     -R %R/usr/ucblib/sparcv9} \
    %{!compat-bsd: \
-     %{!YP,*:%{p|pg:-Y P,/usr/lib/libp/sparcv9:/usr/lib/sparcv9} \
-       %{!p:%{!pg:-Y P,/usr/lib/sparcv9}}}}"
+     %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,%R/usr/lib/sparcv9}}}}"

 #define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE

--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -6120,7 +6120,7 @@
 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
 {
   const char *qpfunc;
-  rtx slot0, slot1, result, tem, tem2;
+  rtx slot0, slot1, result, tem, tem2, libfunc;
   enum machine_mode mode;
   enum rtx_code new_comparison;

@@ -6183,7 +6183,8 @@
 	  emit_move_insn (slot1, y);
 	}

-      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
 			 DImode, 2,
 			 XEXP (slot0, 0), Pmode,
 			 XEXP (slot1, 0), Pmode);
@@ -6191,7 +6192,8 @@
     }
   else
     {
-      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
 			 SImode, 2,
 			 x, TFmode, y, TFmode);
       mode = SImode;
@@ -6202,7 +6204,7 @@
      register so reload doesn't clobber the value if it needs
      the return register for a spill reg.  */
   result = gen_reg_rtx (mode);
-  emit_move_insn (result, hard_libcall_value (mode));
+  emit_move_insn (result, hard_libcall_value (mode, libfunc));

   switch (comparison)
     {
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -270,7 +270,8 @@

 #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGISTER_REGNUM)

-#define ARG_POINTER_CFA_OFFSET(FNDECL) (-STACK_POINTER_OFFSET)
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (crtl->args.pretend_args_size - STACK_POINTER_OFFSET)


 /* Stack Checking */
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1088,7 +1088,7 @@
 			tmake_file="${tmake_file} i386/t-linux64"
 			need_64bit_hwint=yes
 			case X"${with_cpu}" in
-			Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+			Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
 				;;
 			X)
 				if test x$with_cpu_64 = x; then
@@ -1097,7 +1097,7 @@
 				;;
 			*)
 				echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
-				echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+				echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
 				exit 1
 				;;
 			esac
@@ -1202,7 +1202,7 @@
 		# libgcc/configure.ac instead.
 		need_64bit_hwint=yes
 		case X"${with_cpu}" in
-		Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+		Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
 			;;
 		X)
 			if test x$with_cpu_64 = x; then
@@ -1211,7 +1211,7 @@
 			;;
 		*)
 			echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
-			echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
+			echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
 			exit 1
 			;;
 		esac
@@ -1566,6 +1566,7 @@
 			tm_defines="${tm_defines} MIPS_ISA_DEFAULT=65"
 			;;
 	esac
+	tmake_file="$tmake_file mips/t-crtfm"
 	gnu_ld=yes
 	gas=yes
 	test x$with_llsc != x || with_llsc=yes
@@ -1581,6 +1582,7 @@
 		tm_defines="${tm_defines} MIPS_ISA_DEFAULT=32"
         esac
 	test x$with_llsc != x || with_llsc=yes
+	tmake_file="$tmake_file mips/t-crtfm"
 	;;
 mips*-*-openbsd*)
 	tm_defines="${tm_defines} OBSD_HAS_DECLARE_FUNCTION_NAME OBSD_HAS_DECLARE_OBJECT OBSD_HAS_CORRECT_SPECS"
@@ -1796,6 +1798,10 @@
 	tm_file="${tm_file} dbxelf.h elfos.h usegas.h svr4.h freebsd-spec.h rs6000/sysv4.h"
 	extra_options="${extra_options} rs6000/sysv4.opt"
 	tmake_file="rs6000/t-fprules rs6000/t-fprules-fpbit rs6000/t-ppcgas rs6000/t-ppccomm"
+	if test x$enable_powerpc_e500mc_elf = xyes; then
+	  tm_file="${tm_file} rs6000/e500mc.h"
+	  tmake_file="${tmake_file} rs6000/t-ppc-e500mc"
+	fi
 	;;
 powerpc-*-eabialtivec*)
 	tm_file="${tm_file} dbxelf.h elfos.h svr4.h freebsd-spec.h rs6000/sysv4.h rs6000/eabi.h rs6000/e500.h rs6000/eabialtivec.h"
@@ -2420,6 +2426,8 @@
 i[34567]86-*-* | x86_64-*-*)
 	tmake_file="${tmake_file} i386/t-gmm_malloc i386/t-i386"
 	;;
+powerpc*-*-* | rs6000-*-*)
+	tm_file="${tm_file} rs6000/option-defaults.h"
 esac

 # Support for --with-cpu and related options (and a few unrelated options,
@@ -2646,8 +2654,8 @@
 		| armv[23456] | armv2a | armv3m | armv4t | armv5t \
 		| armv5te | armv6j |armv6k | armv6z | armv6zk | armv6-m \
 		| armv7 | armv7-a | armv7-r | armv7-m \
-		| iwmmxt | ep9312)
-			# OK
+		| iwmmxt | ep9312 | marvell-f )
+ 			# OK
 			;;
 		*)
 			echo "Unknown arch used in --with-arch=$with_arch" 1>&2
@@ -2668,7 +2676,10 @@

 		case "$with_fpu" in
 		"" \
-		| fpa | fpe2 | fpe3 | maverick | vfp | vfp3 | neon )
+		| fpa | fpe2 | fpe3 | maverick \
+		| vfp | vfp3 | vfpv3 | vfpv3-fp16 | vfpv3-d16 \
+		| vfpv3-d16-fp16 | vfpv4 | vfpv4-d16 | fpv4-sp-d16 \
+		| neon | neon-fp16 | neon-vfpv4 )
 			# OK
 			;;
 		*)
@@ -2805,7 +2816,7 @@
 				esac
 				# OK
 				;;
-			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
+			"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
 				# OK
 				;;
 			*)
@@ -2817,7 +2828,7 @@
 		;;

 	mips*-*-*)
-		supported_defaults="abi arch float tune divide llsc mips-plt"
+		supported_defaults="abi arch arch_32 arch_64 float tune tune_32 tune_64 divide llsc mips-plt"

 		case ${with_float} in
 		"" | soft | hard)
@@ -2882,12 +2893,20 @@
 		;;

 	powerpc*-*-* | rs6000-*-*)
-		supported_defaults="cpu float tune"
+		supported_defaults="cpu cpu_32 cpu_64 float tune tune_32 tune_64"

-		for which in cpu tune; do
+		for which in cpu cpu_32 cpu_64 tune tune_32 tune_64; do
 			eval "val=\$with_$which"
 			case ${val} in
 			default32 | default64)
+				case $which in
+				cpu | tune)
+					;;
+				*)
+					echo "$val only valid for --with-cpu and --with-tune." 1>&2
+					exit 1
+					;;
+				esac
 				with_which="with_$which"
 				eval $with_which=
 				;;
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -108,6 +108,12 @@
 #endif


+/* Define to warn for use of native system header directories */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_POISON_SYSTEM_DIRECTORIES
+#endif
+
+
 /* Define if you want all operations on RTL (the basic data structure of the
    optimizer and back end) to be checked for dynamic type safety at runtime.
    This is quite expensive. */
@@ -821,6 +827,13 @@
 #endif


+/* Define if your assembler supports specifying the alignment of objects
+   allocated using the GAS .comm command. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GAS_ALIGNED_COMM
+#endif
+
+
 /* Define if your assembler supports .balign and .p2align. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GAS_BALIGN_AND_P2ALIGN
--- a/gcc/configure
+++ b/gcc/configure
@@ -458,7 +458,7 @@
 # include <unistd.h>
 #endif"

-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical build_libsubdir build_subdir host_subdir target_subdir GENINSRC CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT GNATBIND ac_ct_GNATBIND GNATMAKE ac_ct_GNATMAKE NO_MINUS_C_MINUS_O OUTPUT_OPTION CPP EGREP loose_warn strict_warn warn_cflags nocommon_flag TREEBROWSER valgrind_path valgrind_path_defines valgrind_command coverage_flags enable_multilib enable_decimal_float enable_fixed_point enable_shared TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE CROSS_SYSTEM_HEADER_DIR onestep PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI datarootdir docdir htmldir SET_MAKE AWK LN_S LN RANLIB ac_ct_RANLIB ranlib_flags INSTALL INSTALL_PROGRAM INSTALL_DATA make_compare_target have_mktemp_command MAKEINFO BUILD_INFO GENERATED_MANPAGES FLEX BISON NM AR COLLECT2_LIBS GNAT_LIBEXC LDEXP_LIB TARGET_GETGROUPS_T LIBICONV LTLIBICONV LIBICONV_DEP manext objext gthread_flags extra_modes_file extra_opt_files USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT CROSS ALL SYSTEM_HEADER_DIR inhibit_libc CC_FOR_BUILD BUILD_CFLAGS BUILD_LDFLAGS STMP_FIXINC STMP_FIXPROTO collect2 LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN OBJDUMP ac_ct_OBJDUMP ac_ct_AR STRIP ac_ct_STRIP lt_ECHO DSYMUTIL ac_ct_DSYMUTIL NMEDIT ac_ct_NMEDIT LIPO ac_ct_LIPO OTOOL ac_ct_OTOOL OTOOL64 ac_ct_OTOOL64 objdir enable_fast_install gcc_cv_as ORIGINAL_AS_FOR_TARGET gcc_cv_ld ORIGINAL_LD_FOR_TARGET gcc_cv_nm ORIGINAL_NM_FOR_TARGET gcc_cv_objdump gcc_cv_readelf libgcc_visibility GGC zlibdir zlibinc MAINT gcc_tooldir dollar slibdir subdirs srcdir all_compilers all_gtfiles all_lang_makefrags all_lang_makefiles all_languages all_selected_languages build_exeext build_install_headers_dir build_xm_file_list build_xm_include_list build_xm_defines build_file_translate check_languages cpp_install_dir xmake_file tmake_file extra_gcc_objs extra_headers_list extra_objs extra_parts extra_passes extra_programs float_h_file gcc_config_arguments gcc_gxx_include_dir host_exeext host_xm_file_list host_xm_include_list host_xm_defines out_host_hook_obj install lang_opt_files lang_specs_files lang_tree_files local_prefix md_file objc_boehm_gc out_file out_object_file thread_file tm_file_list tm_include_list tm_defines tm_p_file_list tm_p_include_list xm_file_list xm_include_list xm_defines c_target_objs cxx_target_objs fortran_target_objs target_cpu_default GMPLIBS GMPINC PPLLIBS PPLINC CLOOGLIBS CLOOGINC LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical licensedir build_libsubdir build_subdir host_subdir target_subdir GENINSRC CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT GNATBIND ac_ct_GNATBIND GNATMAKE ac_ct_GNATMAKE NO_MINUS_C_MINUS_O OUTPUT_OPTION CPP EGREP loose_warn strict_warn warn_cflags nocommon_flag TREEBROWSER valgrind_path valgrind_path_defines valgrind_command coverage_flags enable_multilib enable_decimal_float enable_fixed_point enable_shared TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE CROSS_SYSTEM_HEADER_DIR EGLIBC_CONFIGS CONFIGURE_SPECS onestep PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI datarootdir docdir htmldir SET_MAKE AWK LN_S LN RANLIB ac_ct_RANLIB ranlib_flags INSTALL INSTALL_PROGRAM INSTALL_DATA make_compare_target have_mktemp_command MAKEINFO BUILD_INFO GENERATED_MANPAGES FLEX BISON NM AR COLLECT2_LIBS GNAT_LIBEXC LDEXP_LIB TARGET_GETGROUPS_T LIBICONV LTLIBICONV LIBICONV_DEP manext objext gthread_flags extra_modes_file extra_opt_files USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT CROSS ALL SYSTEM_HEADER_DIR inhibit_libc CC_FOR_BUILD BUILD_CFLAGS BUILD_LDFLAGS STMP_FIXINC STMP_FIXPROTO collect2 LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN OBJDUMP ac_ct_OBJDUMP ac_ct_AR STRIP ac_ct_STRIP lt_ECHO DSYMUTIL ac_ct_DSYMUTIL NMEDIT ac_ct_NMEDIT LIPO ac_ct_LIPO OTOOL ac_ct_OTOOL OTOOL64 ac_ct_OTOOL64 objdir enable_fast_install gcc_cv_as ORIGINAL_AS_FOR_TARGET gcc_cv_ld ORIGINAL_LD_FOR_TARGET gcc_cv_nm ORIGINAL_NM_FOR_TARGET gcc_cv_objdump gcc_cv_readelf libgcc_visibility GGC zlibdir zlibinc MAINT gcc_tooldir dollar slibdir subdirs srcdir all_compilers all_gtfiles all_lang_makefrags all_lang_makefiles all_languages all_selected_languages build_exeext build_install_headers_dir build_xm_file_list build_xm_include_list build_xm_defines build_file_translate check_languages cpp_install_dir xmake_file tmake_file TM_ENDIAN_CONFIG TM_MULTILIB_CONFIG TM_MULTILIB_EXCEPTIONS_CONFIG extra_gcc_objs extra_headers_list extra_objs extra_parts extra_passes extra_programs float_h_file gcc_config_arguments gcc_gxx_include_dir host_exeext host_xm_file_list host_xm_include_list host_xm_defines out_host_hook_obj install lang_opt_files lang_specs_files lang_tree_files local_prefix md_file objc_boehm_gc out_file out_object_file thread_file tm_file_list tm_include_list tm_defines tm_p_file_list tm_p_include_list xm_file_list xm_include_list xm_defines c_target_objs cxx_target_objs fortran_target_objs target_cpu_default GMPLIBS GMPINC PPLLIBS PPLINC CLOOGLIBS CLOOGINC LIBOBJS LTLIBOBJS'
 ac_subst_files='language_hooks'
 ac_pwd=`pwd`

@@ -1084,6 +1084,8 @@
   --enable-version-specific-runtime-libs
                           specify that runtime libraries should be
                           installed in a compiler-specific directory
+  --enable-poison-system-directories
+                          warn for use of native system header directories

 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@@ -22108,6 +22110,22 @@
 	tls_first_minor=16
 	tls_as_opt='-32 --fatal-warnings'
 	;;
+  m68k-*-*)
+    conftest_s='
+	.section .tdata,"awT",@progbits
+x:
+	.word 2
+	.text
+foo:
+	move.l x@TLSGD(%a5),%a0
+	move.l x@TLSLDM(%a5),%a0
+	move.l x@TLSLDO(%a5),%a0
+	move.l x@TLSIE(%a5),%a0
+	move.l x@TLSLE(%a5),%a0'
+	tls_first_major=2
+	tls_first_minor=19
+	tls_as_opt='--fatal-warnings'
+	;;
   powerpc-*-*)
     conftest_s='
 	.section ".tdata","awT",@progbits
@@ -22739,6 +22757,44 @@
   i[34567]86-*-* | x86_64-*-*)
     case $target_os in
       cygwin* | pe | mingw32*)
+	# Recent binutils allows the three-operand form of ".comm" on PE.  This
+	# definition is used unconditionally to initialise the default state of
+	# the target option variable that governs usage of the feature.
+	echo "$as_me:$LINENO: checking assembler for .comm with alignment" >&5
+echo $ECHO_N "checking assembler for .comm with alignment... $ECHO_C" >&6
+if test "${gcc_cv_as_comm_has_align+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  gcc_cv_as_comm_has_align=no
+    if test $in_tree_gas = yes; then
+    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 19 \) \* 1000 + 52`
+  then gcc_cv_as_comm_has_align=yes
+fi
+  elif test x$gcc_cv_as != x; then
+    echo '.comm foo,1,32' > conftest.s
+    if { ac_try='$gcc_cv_as  -o conftest.o conftest.s >&5'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }
+    then
+	gcc_cv_as_comm_has_align=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+echo "$as_me:$LINENO: result: $gcc_cv_as_comm_has_align" >&5
+echo "${ECHO_T}$gcc_cv_as_comm_has_align" >&6
+
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_GAS_ALIGNED_COMM `if test $gcc_cv_as_comm_has_align = yes; then echo 1; else echo 0; fi`
+_ACEOF
+
 	# Used for DWARF 2 in PE
 	echo "$as_me:$LINENO: checking assembler for .secrel32 relocs" >&5
 echo $ECHO_N "checking assembler for .secrel32 relocs... $ECHO_C" >&6
@@ -24711,6 +24767,21 @@
 fi;


+# Check whether --enable-poison-system-directories or --disable-poison-system-directories was given.
+if test "${enable_poison_system_directories+set}" = set; then
+  enableval="$enable_poison_system_directories"
+
+else
+  enable_poison_system_directories=no
+fi;
+if test "x${enable_poison_system_directories}" = "xyes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define ENABLE_POISON_SYSTEM_DIRECTORIES 1
+_ACEOF
+
+fi
+
 # Substitute configuration variables


--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -2576,6 +2576,22 @@
 	tls_first_minor=16
 	tls_as_opt='-32 --fatal-warnings'
 	;;
+  m68k-*-*)
+    conftest_s='
+	.section .tdata,"awT",@progbits
+x:
+	.word 2
+	.text
+foo:
+	move.l x@TLSGD(%a5),%a0
+	move.l x@TLSLDM(%a5),%a0
+	move.l x@TLSLDO(%a5),%a0
+	move.l x@TLSIE(%a5),%a0
+	move.l x@TLSLE(%a5),%a0'
+	tls_first_major=2
+	tls_first_minor=19
+	tls_as_opt='--fatal-warnings'
+	;;
   powerpc-*-*)
     conftest_s='
 	.section ".tdata","awT",@progbits
@@ -2944,6 +2960,15 @@
 changequote([,])dnl
     case $target_os in
       cygwin* | pe | mingw32*)
+	# Recent binutils allows the three-operand form of ".comm" on PE.  This
+	# definition is used unconditionally to initialise the default state of
+	# the target option variable that governs usage of the feature.
+	gcc_GAS_CHECK_FEATURE([.comm with alignment], gcc_cv_as_comm_has_align,
+	 [2,19,52],,[.comm foo,1,32])
+	AC_DEFINE_UNQUOTED(HAVE_GAS_ALIGNED_COMM,
+	  [`if test $gcc_cv_as_comm_has_align = yes; then echo 1; else echo 0; fi`],
+	  [Define if your assembler supports specifying the alignment
+	   of objects allocated using the GAS .comm command.])
 	# Used for DWARF 2 in PE
 	gcc_GAS_CHECK_FEATURE([.secrel32 relocs],
 	  gcc_cv_as_ix86_pe_secrel32,
@@ -3891,6 +3916,16 @@
 htmldir='$(docdir)')
 AC_SUBST(htmldir)

+AC_ARG_ENABLE([poison-system-directories],
+	      AS_HELP_STRING([--enable-poison-system-directories],
+			     [warn for use of native system header directories]),,
+	      [enable_poison_system_directories=no])
+if test "x${enable_poison_system_directories}" = "xyes"; then
+  AC_DEFINE([ENABLE_POISON_SYSTEM_DIRECTORIES],
+	    [1],
+	    [Define to warn for use of native system header directories])
+fi
+
 # Substitute configuration variables
 AC_SUBST(subdirs)
 AC_SUBST(srcdir)
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -716,6 +716,10 @@
 C ObjC C++ ObjC++
 Treat the input file as already preprocessed

+fremove-local-statics
+C C++ Var(flag_remove_local_statics) Optimization
+Convert function-local static variables to automatic variables when it is safe to do so
+
 freplace-objc-classes
 ObjC ObjC++
 Used in Fix-and-Continue mode to indicate that object files may be swapped in at runtime
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -40,6 +40,7 @@
 #include "mkdeps.h"
 #include "target.h"
 #include "tm_p.h"
+#include "c-tree.h"		/* For c_cpp_error.  */

 #ifndef DOLLARS_IN_IDENTIFIERS
 # define DOLLARS_IN_IDENTIFIERS true
@@ -201,6 +202,7 @@
 {
   static const unsigned int lang_flags[] = {CL_C, CL_ObjC, CL_CXX, CL_ObjCXX};
   unsigned int i, result;
+  struct cpp_callbacks *cb;

   /* This is conditionalized only because that is the way the front
      ends used to do it.  Maybe this should be unconditional?  */
@@ -216,6 +218,8 @@

   parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89,
 				ident_hash, line_table);
+  cb = cpp_get_callbacks (parse_in);
+  cb->error = c_cpp_error;

   cpp_opts = cpp_get_options (parse_in);
   cpp_opts->dollars_in_ident = DOLLARS_IN_IDENTIFIERS;
@@ -333,12 +337,12 @@
 	 or environment var dependency generation is used.  */
       cpp_opts->deps.style = (code == OPT_M ? DEPS_SYSTEM: DEPS_USER);
       flag_no_output = 1;
-      cpp_opts->inhibit_warnings = 1;
       break;

     case OPT_MD:
     case OPT_MMD:
       cpp_opts->deps.style = (code == OPT_MD ? DEPS_SYSTEM: DEPS_USER);
+      cpp_opts->deps.need_preprocessor_output = true;
       deps_file = arg;
       break;

@@ -444,7 +448,6 @@
       break;

     case OPT_Werror:
-      cpp_opts->warnings_are_errors = value;
       global_dc->warning_as_error_requested = value;
       break;

@@ -503,10 +506,6 @@
       warn_strict_null_sentinel = value;
       break;

-    case OPT_Wsystem_headers:
-      cpp_opts->warn_system_headers = value;
-      break;
-
     case OPT_Wtraditional:
       cpp_opts->warn_traditional = value;
       break;
@@ -895,8 +894,6 @@
 	 c_common_post_options, so that a subsequent -Wno-endif-labels
 	 is not overridden.  */
     case OPT_pedantic_errors:
-      cpp_opts->pedantic_errors = 1;
-      /* Fall through.  */
     case OPT_pedantic:
       cpp_opts->pedantic = 1;
       cpp_opts->warn_endif_labels = 1;
@@ -971,10 +968,6 @@
       flag_undef = 1;
       break;

-    case OPT_w:
-      cpp_opts->inhibit_warnings = 1;
-      break;
-
     case OPT_v:
       verbose = true;
       break;
@@ -1159,10 +1152,6 @@

   input_location = UNKNOWN_LOCATION;

-  /* If an error has occurred in cpplib, note it so we fail
-     immediately.  */
-  errorcount += cpp_errors (parse_in);
-
   *pfilename = this_input_filename
     = cpp_read_main_file (parse_in, in_fnames[0]);
   /* Don't do any compilation or preprocessing if there is no input file.  */
@@ -1274,7 +1263,8 @@
 {
   FILE *deps_stream = NULL;

-  if (cpp_opts->deps.style != DEPS_NONE)
+  /* Don't write the deps file if there are errors.  */
+  if (cpp_opts->deps.style != DEPS_NONE && errorcount == 0)
     {
       /* If -M or -MM was seen without -MF, default output to the
 	 output stream.  */
@@ -1290,7 +1280,7 @@

   /* For performance, avoid tearing down cpplib's internal structures
      with cpp_destroy ().  */
-  errorcount += cpp_finish (parse_in, deps_stream);
+  cpp_finish (parse_in, deps_stream);

   if (deps_stream && deps_stream != out_stream
       && (ferror (deps_stream) || fclose (deps_stream)))
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -6136,7 +6136,7 @@
       if (flags & tf_error)
 	{
 	  error ("no matches converting function %qD to type %q#T",
-		 DECL_NAME (OVL_FUNCTION (overload)),
+		 DECL_NAME (OVL_CURRENT (overload)),
 		 target_type);

 	  /* print_candidates expects a chain with the functions in
@@ -6299,13 +6299,8 @@
      dependent on overload resolution.  */
   gcc_assert (TREE_CODE (rhs) == ADDR_EXPR
 	      || TREE_CODE (rhs) == COMPONENT_REF
-	      || TREE_CODE (rhs) == COMPOUND_EXPR
-	      || really_overloaded_fn (rhs));
-
-  /* We don't overwrite rhs if it is an overloaded function.
-     Copying it would destroy the tree link.  */
-  if (TREE_CODE (rhs) != OVERLOAD)
-    rhs = copy_node (rhs);
+	      || really_overloaded_fn (rhs)
+	      || (flag_ms_extensions && TREE_CODE (rhs) == FUNCTION_DECL));

   /* This should really only be used when attempting to distinguish
      what sort of a pointer to function we have.  For now, any
@@ -6357,19 +6352,6 @@
 						/*explicit_targs=*/NULL_TREE,
 						access_path);

-    case COMPOUND_EXPR:
-      TREE_OPERAND (rhs, 0)
-	= instantiate_type (lhstype, TREE_OPERAND (rhs, 0), flags);
-      if (TREE_OPERAND (rhs, 0) == error_mark_node)
-	return error_mark_node;
-      TREE_OPERAND (rhs, 1)
-	= instantiate_type (lhstype, TREE_OPERAND (rhs, 1), flags);
-      if (TREE_OPERAND (rhs, 1) == error_mark_node)
-	return error_mark_node;
-
-      TREE_TYPE (rhs) = lhstype;
-      return rhs;
-
     case ADDR_EXPR:
     {
       if (PTRMEM_OK_P (rhs))
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -43,9 +43,6 @@
 #else
 #define ATTRIBUTE_GCC_CXXDIAG(m, n) ATTRIBUTE_NONNULL(m)
 #endif
-extern void cp_cpp_error			(cpp_reader *, int,
-						 const char *, va_list *)
-     ATTRIBUTE_GCC_CXXDIAG(3,0);
 #ifdef GCC_TOPLEV_H
 #error \
 In order for the format checking to accept the C++ front end diagnostic \
--- a/gcc/cp/cvt.c
+++ b/gcc/cp/cvt.c
@@ -581,6 +581,7 @@
   tree e = expr;
   enum tree_code code = TREE_CODE (type);
   const char *invalid_conv_diag;
+  tree e1;

   if (error_operand_p (e) || type == error_mark_node)
     return error_mark_node;
@@ -629,6 +630,10 @@
 	}
     }

+  e1 = targetm.convert_to_type (type, e);
+  if (e1)
+    return e1;
+
   if (code == VOID_TYPE && (convtype & CONV_STATIC))
     {
       e = convert_to_void (e, /*implicit=*/NULL, tf_warning_or_error);
@@ -1231,11 +1236,18 @@
 tree
 type_promotes_to (tree type)
 {
+  tree promoted_type;
+
   if (type == error_mark_node)
     return error_mark_node;

   type = TYPE_MAIN_VARIANT (type);

+  /* Check for promotions of target-defined types first.  */
+  promoted_type = targetm.promoted_type (type);
+  if (promoted_type)
+    return promoted_type;
+
   /* bool always promotes to int (not unsigned), even if it's the same
      size.  */
   if (type == boolean_type_node)
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -1717,6 +1717,10 @@
       || (DECL_ASSEMBLER_NAME_SET_P (decl)
 	  && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))))
       return true;
+  /* Functions marked "dllexport" must be emitted so that they are
+     visible to other DLLs.  */
+  if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
+    return true;
   /* Otherwise, DECL does not need to be emitted -- yet.  A subsequent
      reference to DECL might cause it to be emitted later.  */
   return false;
@@ -3802,6 +3806,15 @@
     }

   TREE_USED (decl) = 1;
+  if (current_function_decl != NULL_TREE
+      && (TREE_CODE (decl) == VAR_DECL
+	  || TREE_CODE (decl) == PARM_DECL
+	  || TREE_CODE (decl) == FUNCTION_DECL))
+    {
+      tree context = decl_function_context (decl);
+      if (context != NULL_TREE && context != current_function_decl)
+	DECL_NONLOCAL (decl) = 1;
+    }
   if (DECL_CLONED_FUNCTION_P (decl))
     TREE_USED (DECL_CLONED_FUNCTION (decl)) = 1;
   if (TREE_CODE (decl) == FUNCTION_DECL
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4515,7 +4515,7 @@

       cp_apply_type_quals_to_decl (cp_type_quals (TREE_TYPE (decl)), decl);

-      layout_decl (decl, 0);
+      relayout_decl (decl);
     }
 }

@@ -7620,6 +7620,7 @@
   bool parameter_pack_p = declarator? declarator->parameter_pack_p : false;
   bool set_no_warning = false;
   bool template_type_arg = false;
+  const char *errmsg;

   signed_p = declspecs->specs[(int)ds_signed];
   unsigned_p = declspecs->specs[(int)ds_unsigned];
@@ -8299,6 +8300,12 @@
 		type_quals = TYPE_UNQUALIFIED;
 		set_no_warning = true;
 	      }
+	    errmsg = targetm.invalid_return_type (type);
+	    if (errmsg)
+	      {
+		error (errmsg);
+		type = integer_type_node;
+	      }

 	    /* Error about some types functions can't return.  */

@@ -8841,8 +8848,13 @@

 	  /* Replace the anonymous name with the real name everywhere.  */
 	  for (t = TYPE_MAIN_VARIANT (type); t; t = TYPE_NEXT_VARIANT (t))
-	    if (TYPE_NAME (t) == oldname)
-	      TYPE_NAME (t) = decl;
+	    {
+	      if (ANON_AGGRNAME_P (TYPE_IDENTIFIER (t)))
+		{
+		  debug_hooks->set_name (t, decl);
+		  TYPE_NAME (t) = decl;
+		}
+	    }

 	  if (TYPE_LANG_SPECIFIC (type))
 	    TYPE_WAS_ANONYMOUS (type) = 1;
@@ -9679,6 +9691,7 @@
       tree type = NULL_TREE;
       tree init = TREE_PURPOSE (parm);
       tree decl = TREE_VALUE (parm);
+      const char *errmsg;

       if (parm == void_list_node)
 	break;
@@ -9712,6 +9725,14 @@
 	  init = NULL_TREE;
 	}

+      if (type != error_mark_node
+	  && (errmsg = targetm.invalid_parameter_type (type)))
+	{
+	  error (errmsg);
+	  type = error_mark_node;
+	  TREE_TYPE (decl) = error_mark_node;
+	}
+
       if (type != error_mark_node)
 	{
 	  if (deprecated_state != DEPRECATED_SUPPRESS)
--- a/gcc/cp/error.c
+++ b/gcc/cp/error.c
@@ -2667,39 +2667,6 @@
 #undef next_int
 }

-/* Callback from cpp_error for PFILE to print diagnostics arising from
-   interpreting strings.  The diagnostic is of type LEVEL; MSG is the
-   translated message and AP the arguments.  */
-
-void
-cp_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int level,
-	      const char *msg, va_list *ap)
-{
-  diagnostic_info diagnostic;
-  diagnostic_t dlevel;
-  switch (level)
-    {
-    case CPP_DL_WARNING:
-    case CPP_DL_WARNING_SYSHDR:
-      dlevel = DK_WARNING;
-      break;
-    case CPP_DL_PEDWARN:
-      dlevel = DK_PEDWARN;
-      break;
-    case CPP_DL_ERROR:
-      dlevel = DK_ERROR;
-      break;
-    case CPP_DL_ICE:
-      dlevel = DK_ICE;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-  diagnostic_set_info_translated (&diagnostic, msg, ap,
-				  input_location, dlevel);
-  report_diagnostic (&diagnostic);
-}
-
 /* Warn about the use of C++0x features when appropriate.  */
 void
 maybe_warn_cpp0x (const char* str)
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -146,14 +146,26 @@
 static tree
 build_eh_type_type (tree type)
 {
-  tree exp = eh_type_info (type);
+  bool is_ref = TREE_CODE (type) == REFERENCE_TYPE;
+  tree exp;
+
+  if (is_ref)
+    type = TREE_TYPE (type);
+
+  exp = eh_type_info (type);

   if (!exp)
     return NULL;

   mark_used (exp);

-  return convert (ptr_type_node, build_address (exp));
+  exp = build_address (exp);
+
+  if (is_ref)
+    exp = targetm.cxx.ttype_ref_encode (exp);
+
+  exp = convert (ptr_type_node, exp);
+  return exp;
 }

 tree
@@ -495,6 +507,16 @@
       initialize_handler_parm (decl, exp);
     }

+  /* Preserve the reference type on the exception, as this affects
+     derived-to-base conversions in catch matching.  Only do this when
+     the ABI supports it, as originally this case was (incorrectly)
+     treated just as catching a pointer-to-class by value. */
+  if (targetm.cxx.ttype_ref_encode
+      && decl && TREE_CODE (type) == POINTER_TYPE
+      && CLASS_TYPE_P (TREE_TYPE (type))
+      && TREE_CODE (TREE_TYPE (decl)) == REFERENCE_TYPE)
+    type = build_reference_type (type);
+
   return type;
 }

@@ -538,10 +560,20 @@
        raw_raises && TREE_VALUE (raw_raises);
        raw_raises = TREE_CHAIN (raw_raises))
     {
-      tree type = prepare_eh_type (TREE_VALUE (raw_raises));
+      tree orig_type = TREE_VALUE (raw_raises);
+      tree type = prepare_eh_type (orig_type);
       tree tinfo = eh_type_info (type);

       mark_used (tinfo);
+      /* Preserve the reference type on the exception, as this affects
+	 derived-to-base conversions in catch matching.  Only do this when
+	 the ABI supports it, as originally this case was (incorrectly)
+	 treated just as catching a pointer-to-class by value. */
+      if (targetm.cxx.ttype_ref_encode
+	  && TREE_CODE (orig_type) == REFERENCE_TYPE
+	  && TREE_CODE (type) == POINTER_TYPE
+	  && CLASS_TYPE_P (TREE_TYPE (type)))
+	type = build_reference_type (type);
       raises = tree_cons (NULL_TREE, type, raises);
     }

@@ -956,24 +988,40 @@
 static int
 can_convert_eh (tree to, tree from)
 {
-  to = non_reference (to);
-  from = non_reference (from);
+  bool to_ref = TREE_CODE (to) == REFERENCE_TYPE;
+  int depth = to_ref;
+  bool outer_const = true;

-  if (TREE_CODE (to) == POINTER_TYPE && TREE_CODE (from) == POINTER_TYPE)
+  if (to_ref)
+    to = TREE_TYPE (to);
+  from = non_reference (from);
+
+  while (TREE_CODE (to) == POINTER_TYPE && TREE_CODE (from) == POINTER_TYPE)
     {
+      unsigned to_quals, from_quals;
+
+      depth++;
+
       to = TREE_TYPE (to);
       from = TREE_TYPE (from);
+      to_quals = TYPE_QUALS (to);
+      from_quals = TYPE_QUALS (from);

-      if (! at_least_as_qualified_p (to, from))
+      if ((from_quals & ~to_quals)
+	  || (!outer_const && to_quals & ~from_quals))
 	return 0;
-
-      if (TREE_CODE (to) == VOID_TYPE)
-	return 1;
-
-      /* Else fall through.  */
+
+      if (!(to_quals & TYPE_QUAL_CONST))
+	outer_const = false;
     }

-  if (CLASS_TYPE_P (to) && CLASS_TYPE_P (from)
+  if (same_type_ignoring_top_level_qualifiers_p (from, to))
+    return 1;
+
+  if (depth == to_ref + 1 && TREE_CODE (to) == VOID_TYPE)
+    return 1;
+
+  if (depth < 2 && CLASS_TYPE_P (to) && CLASS_TYPE_P (from)
       && PUBLICLY_UNIQUELY_DERIVED_P (to, from))
     return 1;

--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -309,8 +309,7 @@

   /* Subsequent preprocessor diagnostics should use compiler
      diagnostic functions to get the compiler source location.  */
-  cpp_get_options (parse_in)->client_diagnostic = true;
-  cpp_get_callbacks (parse_in)->error = cp_cpp_error;
+  done_lexing = true;

   gcc_assert (lexer->next_token->type != CPP_PURGED);
   return lexer;
--- a/gcc/cp/rtti.c
+++ b/gcc/cp/rtti.c
@@ -393,6 +393,7 @@
 	return d;
     }

+  gcc_assert (TREE_CODE (type) != REFERENCE_TYPE);
   name = mangle_typeinfo_for_type (type);

   d = IDENTIFIER_GLOBAL_VALUE (name);
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -1120,7 +1120,11 @@
     type = expand_start_catch_block (decl);
   HANDLER_TYPE (handler) = type;
   if (!processing_template_decl && type)
-    mark_used (eh_type_info (type));
+    {
+      if (TREE_CODE (type) == REFERENCE_TYPE)
+	type = TREE_TYPE (type);
+      mark_used (eh_type_info (type));
+    }
 }

 /* Finish a handler, which may be given by HANDLER.  The BLOCKs are
@@ -3243,8 +3247,10 @@

       /* If the user wants us to keep all inline functions, then mark
 	 this function as needed so that finish_file will make sure to
-	 output it later.  */
-      if (flag_keep_inline_functions && DECL_DECLARED_INLINE_P (fn))
+	 output it later.  Similarly, all dllexport'd functions must
+	 be emitted; there may be callers in other DLLs.  */
+      if ((flag_keep_inline_functions && DECL_DECLARED_INLINE_P (fn))
+	  || lookup_attribute ("dllexport", DECL_ATTRIBUTES (fn)))
 	mark_needed (fn);
     }

--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -1707,10 +1707,14 @@
 tree
 default_conversion (tree exp)
 {
+  /* Check for target-specific promotions.  */
+  tree promoted_type = targetm.promoted_type (TREE_TYPE (exp));
+  if (promoted_type)
+    exp = cp_convert (promoted_type, exp);
   /* Perform the integral promotions first so that bitfield
      expressions (which may promote to "int", even if the bitfield is
      declared "unsigned") are promoted correctly.  */
-  if (INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (TREE_TYPE (exp)))
+  else if (INTEGRAL_OR_UNSCOPED_ENUMERATION_TYPE_P (TREE_TYPE (exp)))
     exp = perform_integral_promotions (exp);
   /* Perform the other conversions.  */
   exp = decay_conversion (exp);
@@ -3378,7 +3382,6 @@

   /* If an error was already reported for one of the arguments,
      avoid reporting another error.  */
-
   if (code0 == ERROR_MARK || code1 == ERROR_MARK)
     return error_mark_node;

@@ -3389,6 +3392,25 @@
       return error_mark_node;
     }

+  /* Issue warnings about peculiar, but valid, uses of NULL.  */
+  if ((orig_op0 == null_node || orig_op1 == null_node)
+      /* It's reasonable to use pointer values as operands of &&
+	 and ||, so NULL is no exception.  */
+      && code != TRUTH_ANDIF_EXPR && code != TRUTH_ORIF_EXPR
+      && ( /* Both are NULL (or 0) and the operation was not a
+	      comparison or a pointer subtraction.  */
+	  (null_ptr_cst_p (orig_op0) && null_ptr_cst_p (orig_op1)
+	   && code != EQ_EXPR && code != NE_EXPR && code != MINUS_EXPR)
+	  /* Or if one of OP0 or OP1 is neither a pointer nor NULL.  */
+	  || (!null_ptr_cst_p (orig_op0)
+	      && !TYPE_PTR_P (type0) && !TYPE_PTR_TO_MEMBER_P (type0))
+	  || (!null_ptr_cst_p (orig_op1)
+	      && !TYPE_PTR_P (type1) && !TYPE_PTR_TO_MEMBER_P (type1)))
+      && (complain & tf_warning))
+    /* Some sort of arithmetic operation involving NULL was
+       performed.  */
+    warning (OPT_Wpointer_arith, "NULL used in arithmetic");
+
   switch (code)
     {
     case MINUS_EXPR:
@@ -3979,25 +4001,6 @@
 	}
     }

-  /* Issue warnings about peculiar, but valid, uses of NULL.  */
-  if ((orig_op0 == null_node || orig_op1 == null_node)
-      /* It's reasonable to use pointer values as operands of &&
-	 and ||, so NULL is no exception.  */
-      && code != TRUTH_ANDIF_EXPR && code != TRUTH_ORIF_EXPR
-      && ( /* Both are NULL (or 0) and the operation was not a comparison.  */
-	  (null_ptr_cst_p (orig_op0) && null_ptr_cst_p (orig_op1)
-	   && code != EQ_EXPR && code != NE_EXPR)
-	  /* Or if one of OP0 or OP1 is neither a pointer nor NULL.  */
-	  || (!null_ptr_cst_p (orig_op0) && TREE_CODE (TREE_TYPE (op0)) != POINTER_TYPE)
-	  || (!null_ptr_cst_p (orig_op1) && TREE_CODE (TREE_TYPE (op1)) != POINTER_TYPE))
-      && (complain & tf_warning))
-    /* Some sort of arithmetic operation involving NULL was
-       performed.  Note that pointer-difference and pointer-addition
-       have already been handled above, and so we don't end up here in
-       that case.  */
-    warning (OPT_Wpointer_arith, "NULL used in arithmetic");
-
-
   /* If CONVERTED is zero, both args will be converted to type RESULT_TYPE.
      Then the expression will be built.
      It will be given type FINAL_TYPE if that is nonzero;
@@ -5024,6 +5027,12 @@
       return rhs;
     }

+  if (type_unknown_p (rhs))
+    {
+      error ("no context to resolve type of %qE", rhs);
+      return error_mark_node;
+    }
+
   return build2 (COMPOUND_EXPR, TREE_TYPE (rhs), lhs, rhs);
 }

--- a/gcc/c-ppoutput.c
+++ b/gcc/c-ppoutput.c
@@ -521,6 +521,7 @@

   if (map != NULL)
     {
+      input_location = map->start_location;
       if (print.first_time)
 	{
 	  /* Avoid printing foo.i when the main file is foo.c.  */
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -5754,6 +5754,11 @@
       validate_change (object, &XEXP (x, i),
 		       cse_process_notes (XEXP (x, i), object, changed), 0);

+  /* Rebuild a PLUS expression in canonical form if the first operand
+     ends up as a constant.  */
+  if (code == PLUS && GET_CODE (XEXP (x, 0)) == CONST_INT)
+    return plus_constant (XEXP(x, 1), INTVAL (XEXP (x, 0)));
+
   return x;
 }

--- a/gcc/c-tree.h
+++ b/gcc/c-tree.h
@@ -647,4 +647,8 @@
 extern void pedwarn_c90 (location_t, int opt, const char *, ...) ATTRIBUTE_GCC_CDIAG(3,4);
 extern void pedwarn_c99 (location_t, int opt, const char *, ...) ATTRIBUTE_GCC_CDIAG(3,4);

+extern bool c_cpp_error (cpp_reader *, int, location_t, unsigned int,
+			 const char *, va_list *)
+     ATTRIBUTE_GCC_CDIAG(5,0);
+
 #endif /* ! GCC_C_TREE_H */
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@@ -1765,6 +1765,7 @@
   tree orig_exp;
   tree type = TREE_TYPE (exp);
   enum tree_code code = TREE_CODE (type);
+  tree promoted_type;

   /* Functions and arrays have been converted during parsing.  */
   gcc_assert (code != FUNCTION_TYPE);
@@ -1801,6 +1802,10 @@
   if (exp == error_mark_node)
     return error_mark_node;

+  promoted_type = targetm.promoted_type (type);
+  if (promoted_type)
+    return convert (promoted_type, exp);
+
   if (INTEGRAL_TYPE_P (type))
     return perform_integral_promotions (exp);

--- a/gcc/dbxout.c
+++ b/gcc/dbxout.c
@@ -376,6 +376,7 @@
   dbxout_handle_pch,		         /* handle_pch */
   debug_nothing_rtx,		         /* var_location */
   debug_nothing_void,                    /* switch_text_section */
+  debug_nothing_tree_tree,		 /* set_name */
   0                                      /* start_end_main_source_file */
 };
 #endif /* DBX_DEBUGGING_INFO  */
@@ -408,6 +409,7 @@
   dbxout_handle_pch,		         /* handle_pch */
   debug_nothing_rtx,		         /* var_location */
   debug_nothing_void,                    /* switch_text_section */
+  debug_nothing_tree_tree,	         /* set_name */
   0                                      /* start_end_main_source_file */
 };
 #endif /* XCOFF_DEBUGGING_INFO  */
--- a/gcc/debug.c
+++ b/gcc/debug.c
@@ -50,6 +50,7 @@
   debug_nothing_int,		         /* handle_pch */
   debug_nothing_rtx,		         /* var_location */
   debug_nothing_void,                    /* switch_text_section */
+  debug_nothing_tree_tree,		 /* set_name */
   0                                      /* start_end_main_source_file */
 };

@@ -67,6 +68,12 @@
 }

 void
+debug_nothing_tree_tree (tree t1 ATTRIBUTE_UNUSED,
+			 tree t2 ATTRIBUTE_UNUSED)
+{
+}
+
+void
 debug_nothing_tree_tree_tree_bool (tree t1 ATTRIBUTE_UNUSED,
 				   tree t2 ATTRIBUTE_UNUSED,
 				   tree t3 ATTRIBUTE_UNUSED,
--- a/gcc/debug.h
+++ b/gcc/debug.h
@@ -126,6 +126,10 @@
      text sections.  */
   void (* switch_text_section) (void);

+  /* Called from grokdeclarator.  Replaces the anonymous name with the
+     type name.  */
+  void (* set_name) (tree, tree);
+
   /* This is 1 if the debug writer wants to see start and end commands for the
      main source files, and 0 otherwise.  */
   int start_end_main_source_file;
@@ -140,6 +144,7 @@
 extern void debug_nothing_int (unsigned int);
 extern void debug_nothing_int_int (unsigned int, unsigned int);
 extern void debug_nothing_tree (tree);
+extern void debug_nothing_tree_tree (tree, tree);
 extern void debug_nothing_tree_int (tree, int);
 extern void debug_nothing_tree_tree_tree_bool (tree, tree, tree, bool);
 extern bool debug_true_const_tree (const_tree);
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -902,7 +902,8 @@

 /* On most machines, the CFA coincides with the first incoming parm.  */
 #ifndef ARG_POINTER_CFA_OFFSET
-#define ARG_POINTER_CFA_OFFSET(FNDECL) FIRST_PARM_OFFSET (FNDECL)
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (FIRST_PARM_OFFSET (FNDECL) + crtl->args.pretend_args_size)
 #endif

 /* On most machines, we use the CFA as DW_AT_frame_base.  */
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -126,6 +126,7 @@
   diagnostic->message.args_ptr = args;
   diagnostic->message.format_spec = msg;
   diagnostic->location = location;
+  diagnostic->override_column = 0;
   diagnostic->kind = kind;
   diagnostic->option_index = 0;
 }
@@ -153,6 +154,8 @@
   };
   const char *text = _(diagnostic_kind_text[diagnostic->kind]);
   expanded_location s = expand_location (diagnostic->location);
+  if (diagnostic->override_column)
+    s.column = diagnostic->override_column;
   gcc_assert (diagnostic->kind < DK_LAST_DIAGNOSTIC_KIND);

   return
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -41,6 +41,7 @@
 {
   text_info message;
   location_t location;
+  unsigned int override_column;
   /* TREE_BLOCK if the diagnostic is to be reported in some inline
      function inlined into other function, otherwise NULL.  */
   tree abstract_origin;
@@ -185,6 +186,10 @@

 #define report_diagnostic(D) diagnostic_report_diagnostic (global_dc, D)

+/* Override the column number to be used for reporting a
+   diagnostic.  */
+#define diagnostic_override_column(DI, COL) (DI)->override_column = (COL)
+
 /* Diagnostic related functions.  */
 extern void diagnostic_initialize (diagnostic_context *);
 extern void diagnostic_report_current_module (diagnostic_context *);
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -2474,6 +2474,12 @@
     insn = PATTERN (insn);

   dwarf2out_frame_debug_expr (insn, label);
+
+  /* Check again.  A parallel can save and update the same register.
+     We could probably check just once, here, but this is safer than
+     removing the check above.  */
+  if (clobbers_queued_reg_save (insn))
+    flush_queued_reg_saves ();
 }

 #endif
@@ -4598,6 +4604,7 @@
 static void dwarf2out_abstract_function (tree);
 static void dwarf2out_var_location (rtx);
 static void dwarf2out_begin_function (tree);
+static void dwarf2out_set_name (tree, tree);

 /* The debug hooks structure.  */

@@ -4631,6 +4638,7 @@
   debug_nothing_int,		/* handle_pch */
   dwarf2out_var_location,
   dwarf2out_switch_text_section,
+  dwarf2out_set_name,
   1                             /* start_end_main_source_file */
 };
 #endif
@@ -5975,12 +5983,9 @@
 		 (const char *)x2) == 0;
 }

-/* Add a string attribute value to a DIE.  */
-
-static inline void
-add_AT_string (dw_die_ref die, enum dwarf_attribute attr_kind, const char *str)
+static struct indirect_string_node *
+find_AT_string (const char *str)
 {
-  dw_attr_node attr;
   struct indirect_string_node *node;
   void **slot;

@@ -6001,6 +6006,18 @@
     node = (struct indirect_string_node *) *slot;

   node->refcount++;
+  return node;
+}
+
+/* Add a string attribute value to a DIE.  */
+
+static inline void
+add_AT_string (dw_die_ref die, enum dwarf_attribute attr_kind, const char *str)
+{
+  dw_attr_node attr;
+  struct indirect_string_node *node;
+
+  node = find_AT_string (str);

   attr.dw_attr = attr_kind;
   attr.dw_attr_val.val_class = dw_val_class_str;
@@ -6637,6 +6654,8 @@
 static inline var_loc_list *
 lookup_decl_loc (const_tree decl)
 {
+  if (!decl_loc_table)
+    return NULL;
   return (var_loc_list *)
     htab_find_with_hash (decl_loc_table, decl, DECL_UID (decl));
 }
@@ -13471,6 +13490,7 @@
   tree save_fn;
   tree context;
   int was_abstract = DECL_ABSTRACT (decl);
+  htab_t old_decl_loc_table;

   /* Make sure we have the actual abstract inline, not a clone.  */
   decl = DECL_ORIGIN (decl);
@@ -13480,6 +13500,12 @@
     /* We've already generated the abstract instance.  */
     return;

+  /* We can be called while recursively when seeing block defining inlined subroutine
+     DIE.  Be sure to not clobber the outer location table nor use it or we would
+     get locations in abstract instantces.  */
+  old_decl_loc_table = decl_loc_table;
+  decl_loc_table = NULL;
+
   /* Be sure we've emitted the in-class declaration DIE (if any) first, so
      we don't get confused by DECL_ABSTRACT.  */
   if (debug_info_level > DINFO_LEVEL_TERSE)
@@ -13501,6 +13527,7 @@
     set_decl_abstract_flags (decl, 0);

   current_function_decl = save_fn;
+  decl_loc_table = old_decl_loc_table;
   pop_cfun ();
 }

@@ -15796,6 +15823,31 @@
   return fd->emitted_number;
 }

+/* Replace DW_AT_name for the decl with name.  */
+
+static void
+dwarf2out_set_name (tree decl, tree name)
+{
+  dw_die_ref die;
+  dw_attr_ref attr;
+
+  die = TYPE_SYMTAB_DIE (decl);
+  if (!die)
+    return;
+
+  attr = get_AT (die, DW_AT_name);
+  if (attr)
+    {
+      struct indirect_string_node *node;
+
+      node = find_AT_string (dwarf2_name (name, 0));
+      /* replace the string.  */
+      attr->dw_attr_val.v.val_str = node;
+    }
+
+  else
+    add_name_attribute (die, dwarf2_name (name, 0));
+}
 /* Called by the final INSN scan whenever we see a var location.  We
    use it to drop labels in the right places, and throw the location in
    our lookup table.  */
--- a/gcc/except.c
+++ b/gcc/except.c
@@ -3567,6 +3567,12 @@
 	 paths below go through assemble_integer, which would take
 	 care of this for us.  */
       STRIP_NOPS (type);
+      if (TREE_CODE (type) == POINTER_PLUS_EXPR)
+	{
+	  gcc_assert (TREE_CODE (TREE_OPERAND (type, 1)) == INTEGER_CST);
+	  type = TREE_OPERAND (type, 0);
+	  STRIP_NOPS (type);
+	}
       if (TREE_CODE (type) == ADDR_EXPR)
 	{
 	  type = TREE_OPERAND (type, 0);
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1491,9 +1491,9 @@
    in which a scalar value of mode MODE was returned by a library call.  */

 rtx
-hard_libcall_value (enum machine_mode mode)
+hard_libcall_value (enum machine_mode mode, rtx fun)
 {
-  return LIBCALL_VALUE (mode);
+  return targetm.calls.libcall_value (mode, fun);
 }

 /* Look up the tree code for a given rtx code
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -103,7 +103,8 @@
 static int neg_cost[2][NUM_MACHINE_MODES];
 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 static int mul_cost[2][NUM_MACHINE_MODES];
 static int sdiv_cost[2][NUM_MACHINE_MODES];
 static int udiv_cost[2][NUM_MACHINE_MODES];
@@ -130,7 +131,8 @@
     struct rtx_def shift;	rtunion shift_fld1;
     struct rtx_def shift_mult;	rtunion shift_mult_fld1;
     struct rtx_def shift_add;	rtunion shift_add_fld1;
-    struct rtx_def shift_sub;	rtunion shift_sub_fld1;
+    struct rtx_def shift_sub0;	rtunion shift_sub0_fld1;
+    struct rtx_def shift_sub1;	rtunion shift_sub1_fld1;
   } all;

   rtx pow2[MAX_BITS_PER_WORD];
@@ -201,9 +203,13 @@
   XEXP (&all.shift_add, 0) = &all.shift_mult;
   XEXP (&all.shift_add, 1) = &all.reg;

-  PUT_CODE (&all.shift_sub, MINUS);
-  XEXP (&all.shift_sub, 0) = &all.shift_mult;
-  XEXP (&all.shift_sub, 1) = &all.reg;
+  PUT_CODE (&all.shift_sub0, MINUS);
+  XEXP (&all.shift_sub0, 0) = &all.shift_mult;
+  XEXP (&all.shift_sub0, 1) = &all.reg;
+
+  PUT_CODE (&all.shift_sub1, MINUS);
+  XEXP (&all.shift_sub1, 0) = &all.reg;
+  XEXP (&all.shift_sub1, 1) = &all.shift_mult;

   for (speed = 0; speed < 2; speed++)
     {
@@ -226,7 +232,8 @@
 	  PUT_MODE (&all.shift, mode);
 	  PUT_MODE (&all.shift_mult, mode);
 	  PUT_MODE (&all.shift_add, mode);
-	  PUT_MODE (&all.shift_sub, mode);
+	  PUT_MODE (&all.shift_sub0, mode);
+	  PUT_MODE (&all.shift_sub1, mode);

 	  add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 	  neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
@@ -254,8 +261,8 @@
 	    }

 	  shift_cost[speed][mode][0] = 0;
-	  shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
-	    = add_cost[speed][mode];
+	  shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
+	    = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];

 	  n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 	  for (m = 1; m < n; m++)
@@ -265,7 +272,8 @@

 	      shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 	      shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
-	      shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
+	      shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
+	      shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 	    }
 	}
     }
@@ -2397,6 +2405,7 @@
   struct mult_cost best_cost;
   struct mult_cost new_limit;
   int op_cost, op_latency;
+  unsigned HOST_WIDE_INT orig_t = t;
   unsigned HOST_WIDE_INT q;
   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
   int hash_index;
@@ -2542,6 +2551,38 @@
 	      best_alg->log[best_alg->ops] = m;
 	      best_alg->op[best_alg->ops] = alg_shift;
 	    }
+
+	  /* See if treating ORIG_T as a signed number yields a better
+	     sequence.  Try this sequence only for a negative ORIG_T
+	     as it would be useless for a non-negative ORIG_T.  */
+	  if ((HOST_WIDE_INT) orig_t < 0)
+	    {
+	      /* Shift ORIG_T as follows because a right shift of a
+		 negative-valued signed type is implementation
+		 defined.  */
+	      q = ~(~orig_t >> m);
+	      /* The function expand_shift will choose between a shift
+		 and a sequence of additions, so the observed cost is
+		 given as MIN (m * add_cost[speed][mode],
+		 shift_cost[speed][mode][m]).  */
+	      op_cost = m * add_cost[speed][mode];
+	      if (shift_cost[speed][mode][m] < op_cost)
+		op_cost = shift_cost[speed][mode][m];
+	      new_limit.cost = best_cost.cost - op_cost;
+	      new_limit.latency = best_cost.latency - op_cost;
+	      synth_mult (alg_in, q, &new_limit, mode);
+
+	      alg_in->cost.cost += op_cost;
+	      alg_in->cost.latency += op_cost;
+	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
+		{
+		  struct algorithm *x;
+		  best_cost = alg_in->cost;
+		  x = alg_in, alg_in = best_alg, best_alg = x;
+		  best_alg->log[best_alg->ops] = m;
+		  best_alg->op[best_alg->ops] = alg_shift;
+		}
+	    }
 	}
       if (cache_hit)
 	goto done;
@@ -2604,6 +2645,29 @@
 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
 	    }
 	}
+
+      /* We may be able to calculate a * -7, a * -15, a * -31, etc
+	 quickly with a - a * n for some appropriate constant n.  */
+      m = exact_log2 (-orig_t + 1);
+      if (m >= 0 && m < maxm)
+	{
+	  op_cost = shiftsub1_cost[speed][mode][m];
+	  new_limit.cost = best_cost.cost - op_cost;
+	  new_limit.latency = best_cost.latency - op_cost;
+	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
+
+	  alg_in->cost.cost += op_cost;
+	  alg_in->cost.latency += op_cost;
+	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
+	    {
+	      struct algorithm *x;
+	      best_cost = alg_in->cost;
+	      x = alg_in, alg_in = best_alg, best_alg = x;
+	      best_alg->log[best_alg->ops] = m;
+	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
+	    }
+	}
+
       if (cache_hit)
 	goto done;
     }
@@ -2673,9 +2737,9 @@
 	     hardware the shift may be executed concurrently with the
 	     earlier steps in the algorithm.  */
 	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
-	  if (shiftsub_cost[speed][mode][m] < op_cost)
+	  if (shiftsub0_cost[speed][mode][m] < op_cost)
 	    {
-	      op_cost = shiftsub_cost[speed][mode][m];
+	      op_cost = shiftsub0_cost[speed][mode][m];
 	      op_latency = op_cost;
 	    }
 	  else
@@ -2738,7 +2802,7 @@
       m = exact_log2 (q);
       if (m >= 0 && m < maxm)
 	{
-	  op_cost = shiftsub_cost[speed][mode][m];
+	  op_cost = shiftsub0_cost[speed][mode][m];
 	  new_limit.cost = best_cost.cost - op_cost;
 	  new_limit.latency = best_cost.latency - op_cost;
 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -4391,6 +4391,29 @@

   /* Compute FROM and store the value in the rtx we got.  */

+  if (TREE_CODE (to) == MISALIGNED_INDIRECT_REF)
+    {
+      rtx insn;
+      rtx from_rtx;
+      enum insn_code icode;
+      enum machine_mode mode = GET_MODE (to_rtx);
+
+      icode = optab_handler (movmisalign_optab, mode)->insn_code;
+      gcc_assert (icode != CODE_FOR_nothing);
+
+      from_rtx = expand_expr (from, NULL_RTX, mode, EXPAND_NORMAL);
+      insn = GEN_FCN (icode) (to_rtx, from_rtx);
+      /* If that failed then force the source into a reg and try again.  */
+      if (!insn)
+	{
+	  from_rtx = copy_to_mode_reg(mode, from_rtx);
+	  insn = GEN_FCN (icode) (to_rtx, from_rtx);
+	  gcc_assert(insn);
+	}
+      emit_insn (insn);
+      return;
+    }
+
   push_temp_slots ();
   result = store_expr (from, to_rtx, 0, nontemporal);
   preserve_temp_slots (result);
@@ -7291,6 +7314,19 @@
       decl_rtl = DECL_RTL (exp);
       gcc_assert (decl_rtl);
       decl_rtl = copy_rtx (decl_rtl);
+      /* Record writes to register variables.  */
+      if (modifier == EXPAND_WRITE && REG_P(decl_rtl)
+	  && REGNO(decl_rtl) < FIRST_PSEUDO_REGISTER)
+	{
+	    int i = REGNO(decl_rtl);
+	    int nregs = hard_regno_nregs[i][GET_MODE(decl_rtl)];
+	    while (nregs)
+	      {
+		SET_HARD_REG_BIT(crtl->asm_clobbers, i);
+		i++;
+		nregs--;
+	      }
+	}

       /* Ensure variable marked as used even if it doesn't go through
 	 a parser.  If it hasn't be used yet, write out an external
@@ -7538,14 +7574,15 @@

 	/* Resolve the misalignment now, so that we don't have to remember
 	   to resolve it later.  Of course, this only works for reads.  */
-	/* ??? When we get around to supporting writes, we'll have to handle
-	   this in store_expr directly.  The vectorizer isn't generating
-	   those yet, however.  */
 	if (code == MISALIGNED_INDIRECT_REF)
 	  {
 	    int icode;
 	    rtx reg, insn;

+	    /* For writes produce a MEM, and expand_assignment will DTRT.  */
+	    if (modifier == EXPAND_WRITE)
+	      return temp;
+
 	    gcc_assert (modifier == EXPAND_NORMAL
 			|| modifier == EXPAND_STACK_PARM);

--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -757,7 +757,7 @@

 /* Return an rtx that refers to the value returned by a library call
    in its original home.  This becomes invalid if any more code is emitted.  */
-extern rtx hard_libcall_value (enum machine_mode);
+extern rtx hard_libcall_value (enum machine_mode, rtx);

 /* Return the mode desired by operand N of a particular bitfield
    insert/extract insn, or MAX_MACHINE_MODE if no such insn is
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -891,6 +891,7 @@
       if (LABEL_P (insn))
 	{
 	  rtx next;
+	  bool next_is_jumptable;

 	  /* Merge in alignments computed by compute_alignments.  */
 	  log = LABEL_TO_ALIGNMENT (insn);
@@ -900,31 +901,30 @@
 	      max_skip = LABEL_TO_MAX_SKIP (insn);
 	    }

-	  log = LABEL_ALIGN (insn);
-	  if (max_log < log)
+	  next = next_nonnote_insn (insn);
+	  next_is_jumptable = next && JUMP_TABLE_DATA_P (next);
+	  if (!next_is_jumptable)
 	    {
-	      max_log = log;
-	      max_skip = LABEL_ALIGN_MAX_SKIP;
+	      log = LABEL_ALIGN (insn);
+	      if (max_log < log)
+		{
+		  max_log = log;
+		  max_skip = LABEL_ALIGN_MAX_SKIP;
+		}
 	    }
-	  next = next_nonnote_insn (insn);
 	  /* ADDR_VECs only take room if read-only data goes into the text
 	     section.  */
-	  if (JUMP_TABLES_IN_TEXT_SECTION
-	      || readonly_data_section == text_section)
-	    if (next && JUMP_P (next))
-	      {
-		rtx nextbody = PATTERN (next);
-		if (GET_CODE (nextbody) == ADDR_VEC
-		    || GET_CODE (nextbody) == ADDR_DIFF_VEC)
-		  {
-		    log = ADDR_VEC_ALIGN (next);
-		    if (max_log < log)
-		      {
-			max_log = log;
-			max_skip = LABEL_ALIGN_MAX_SKIP;
-		      }
-		  }
-	      }
+	  if ((JUMP_TABLES_IN_TEXT_SECTION
+	       || readonly_data_section == text_section)
+	      && next_is_jumptable)
+	    {
+	      log = ADDR_VEC_ALIGN (next);
+	      if (max_log < log)
+		{
+		  max_log = log;
+		  max_skip = LABEL_ALIGN_MAX_SKIP;
+		}
+	    }
 	  LABEL_TO_ALIGNMENT (insn) = max_log;
 	  LABEL_TO_MAX_SKIP (insn) = max_skip;
 	  max_log = 0;
@@ -2013,48 +2013,41 @@
       app_disable ();

       next = next_nonnote_insn (insn);
-      if (next != 0 && JUMP_P (next))
+      /* If this label is followed by a jump-table, make sure we put
+	 the label in the read-only section.  Also possibly write the
+	 label and jump table together.  */
+      if (next != 0 && JUMP_TABLE_DATA_P (next))
 	{
-	  rtx nextbody = PATTERN (next);
-
-	  /* If this label is followed by a jump-table,
-	     make sure we put the label in the read-only section.  Also
-	     possibly write the label and jump table together.  */
-
-	  if (GET_CODE (nextbody) == ADDR_VEC
-	      || GET_CODE (nextbody) == ADDR_DIFF_VEC)
-	    {
 #if defined(ASM_OUTPUT_ADDR_VEC) || defined(ASM_OUTPUT_ADDR_DIFF_VEC)
-	      /* In this case, the case vector is being moved by the
-		 target, so don't output the label at all.  Leave that
-		 to the back end macros.  */
+	  /* In this case, the case vector is being moved by the
+	     target, so don't output the label at all.  Leave that
+	     to the back end macros.  */
 #else
-	      if (! JUMP_TABLES_IN_TEXT_SECTION)
-		{
-		  int log_align;
+	  if (! JUMP_TABLES_IN_TEXT_SECTION)
+	    {
+	      int log_align;

-		  switch_to_section (targetm.asm_out.function_rodata_section
-				     (current_function_decl));
+	      switch_to_section (targetm.asm_out.function_rodata_section
+				 (current_function_decl));

 #ifdef ADDR_VEC_ALIGN
-		  log_align = ADDR_VEC_ALIGN (next);
+	      log_align = ADDR_VEC_ALIGN (next);
 #else
-		  log_align = exact_log2 (BIGGEST_ALIGNMENT / BITS_PER_UNIT);
+	      log_align = exact_log2 (BIGGEST_ALIGNMENT / BITS_PER_UNIT);
 #endif
-		  ASM_OUTPUT_ALIGN (file, log_align);
-		}
-	      else
-		switch_to_section (current_function_section ());
+	      ASM_OUTPUT_ALIGN (file, log_align);
+	    }
+	  else
+	    switch_to_section (current_function_section ());

 #ifdef ASM_OUTPUT_CASE_LABEL
-	      ASM_OUTPUT_CASE_LABEL (file, "L", CODE_LABEL_NUMBER (insn),
-				     next);
+	  ASM_OUTPUT_CASE_LABEL (file, "L", CODE_LABEL_NUMBER (insn),
+				 next);
 #else
-	      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (insn));
+	  targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (insn));
 #endif
 #endif
-	      break;
-	    }
+	  break;
 	}
       if (LABEL_ALT_ENTRY_P (insn))
 	output_alternate_entry_point (file, insn);
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -2289,7 +2289,24 @@
   real_convert (&value, TYPE_MODE (type), &TREE_REAL_CST (arg1));
   t = build_real (type, value);

-  TREE_OVERFLOW (t) = TREE_OVERFLOW (arg1);
+  /* If converting an infinity or NAN to a representation that doesn't
+     have one, set the overflow bit so that we can produce some kind of
+     error message at the appropriate point if necessary.  It's not the
+     most user-friendly message, but it's better than nothing.  */
+  if (REAL_VALUE_ISINF (TREE_REAL_CST (arg1))
+      && !MODE_HAS_INFINITIES (TYPE_MODE (type)))
+    TREE_OVERFLOW (t) = 1;
+  else if (REAL_VALUE_ISNAN (TREE_REAL_CST (arg1))
+	   && !MODE_HAS_NANS (TYPE_MODE (type)))
+    TREE_OVERFLOW (t) = 1;
+  /* Regular overflow, conversion produced an infinity in a mode that
+     can't represent them.  */
+  else if (!MODE_HAS_INFINITIES (TYPE_MODE (type))
+	   && REAL_VALUE_ISINF (value)
+	   && !REAL_VALUE_ISINF (TREE_REAL_CST (arg1)))
+    TREE_OVERFLOW (t) = 1;
+  else
+    TREE_OVERFLOW (t) = TREE_OVERFLOW (arg1);
   return t;
 }

--- a/gcc/fortran/cpp.c
+++ b/gcc/fortran/cpp.c
@@ -137,6 +137,9 @@
 static void cb_ident (cpp_reader *, source_location, const cpp_string *);
 static void cb_used_define (cpp_reader *, source_location, cpp_hashnode *);
 static void cb_used_undef (cpp_reader *, source_location, cpp_hashnode *);
+static bool cb_cpp_error (cpp_reader *, int, location_t, unsigned int,
+			  const char *, va_list *)
+     ATTRIBUTE_GCC_DIAG(5,0);
 void pp_dir_change (cpp_reader *, const char *);

 static int dump_macro (cpp_reader *, cpp_hashnode *, void *);
@@ -452,7 +455,6 @@
   cpp_option->cplusplus_comments = 0;

   cpp_option->pedantic = pedantic;
-  cpp_option->inhibit_warnings = inhibit_warnings;

   cpp_option->dollars_in_ident = gfc_option.flag_dollar_ok;
   cpp_option->discard_comments = gfc_cpp_option.discard_comments;
@@ -465,9 +467,6 @@

   cpp_post_options (cpp_in);

-  /* If an error has occurred in cpplib, note it so we fail immediately.  */
-  errorcount += cpp_errors (cpp_in);
-
   gfc_cpp_register_include_paths ();
 }

@@ -482,6 +481,7 @@
   cb->line_change = cb_line_change;
   cb->ident = cb_ident;
   cb->def_pragma = cb_def_pragma;
+  cb->error = cb_cpp_error;

   if (gfc_cpp_option.dump_includes)
     cb->include = cb_include;
@@ -961,6 +961,57 @@
   cpp_define_queue = q;
 }

+/* Callback from cpp_error for PFILE to print diagnostics from the
+   preprocessor.  The diagnostic is of type LEVEL, at location
+   LOCATION, with column number possibly overridden by COLUMN_OVERRIDE
+   if not zero; MSG is the translated message and AP the arguments.
+   Returns true if a diagnostic was emitted, false otherwise.  */
+
+static bool
+cb_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int level,
+	      location_t location, unsigned int column_override,
+	      const char *msg, va_list *ap)
+{
+  diagnostic_info diagnostic;
+  diagnostic_t dlevel;
+  int save_warn_system_headers = warn_system_headers;
+  bool ret;
+
+  switch (level)
+    {
+    case CPP_DL_WARNING_SYSHDR:
+      warn_system_headers = 1;
+      /* Fall through.  */
+    case CPP_DL_WARNING:
+      dlevel = DK_WARNING;
+      break;
+    case CPP_DL_PEDWARN:
+      dlevel = DK_PEDWARN;
+      break;
+    case CPP_DL_ERROR:
+      dlevel = DK_ERROR;
+      break;
+    case CPP_DL_ICE:
+      dlevel = DK_ICE;
+      break;
+    case CPP_DL_NOTE:
+      dlevel = DK_NOTE;
+      break;
+    case CPP_DL_FATAL:
+      dlevel = DK_FATAL;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  diagnostic_set_info_translated (&diagnostic, msg, ap,
+				  location, dlevel);
+  if (column_override)
+    diagnostic_override_column (&diagnostic, column_override);
+  ret = report_diagnostic (&diagnostic);
+  if (level == CPP_DL_WARNING_SYSHDR)
+    warn_system_headers = save_warn_system_headers;
+  return ret;
+}

 /* Callback called when -fworking-director and -E to emit working
    directory in cpp output file.  */
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -272,7 +272,10 @@
   if (! type)
     type = lang_hooks.types.type_for_mode (mode, 0);

-  return STACK_SLOT_ALIGNMENT (type, mode, alignment);
+  return alignment_for_aligned_arrays (type,
+				       STACK_SLOT_ALIGNMENT (type,
+							     mode,
+							     alignment));
 }

 /* Allocate a stack slot of SIZE bytes and return a MEM rtx for it
@@ -5359,6 +5362,57 @@
 {
   return IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (cfun->decl));
 }
+
+/* This function adjusts alignments as appropriate according to the
+   setting of -falign-arrays.  If that is specified then the minimum
+   alignment for array variables is set to be the largest power of two
+   less than or equal to their total storage size, or the biggest
+   alignment used on the machine, whichever is smaller.  */
+
+unsigned int
+alignment_for_aligned_arrays (tree ty, unsigned int existing_alignment)
+{
+  unsigned int min_alignment;
+  tree size;
+
+  /* Return the existing alignment if not using -falign-arrays or if
+     the type is not an array type.  */
+  if (!flag_align_arrays || !ty || TREE_CODE (ty) != ARRAY_TYPE)
+    return existing_alignment;
+
+  /* Extract the total storage size of the array in bits.  */
+  size = TYPE_SIZE (ty);
+  gcc_assert (size);
+
+  /* At least for variable-length arrays, TREE_CODE (size) might not be an
+     integer constant; check it now.  If it is not, give the array at
+     least BIGGEST_ALIGNMENT just to be safe.   Furthermore, we assume that
+     alignments always fit into a host integer.  So if we can't fit the
+     size of the array in bits into a host integer, it must also be large
+     enough to deserve at least BIGGEST_ALIGNMENT (see below).  */
+  if (TREE_CODE (size) != INTEGER_CST || !host_integerp (size, 1))
+    min_alignment = BIGGEST_ALIGNMENT;
+  else
+    {
+      unsigned HOST_WIDE_INT bits = TREE_INT_CST_LOW (size);
+      bits = (bits ? bits : 1);
+
+      /* An array with size greater than BIGGEST_ALIGNMENT is assigned
+	 at least that alignment.  In all other cases the minimum
+	 alignment of the array is set to be the largest power of two
+	 less than or equal to the total storage size of the array.
+	 We assume that BIGGEST_ALIGNMENT fits in "unsigned int"; thus,
+	 the shift below will not overflow.  */
+      if (bits >= BIGGEST_ALIGNMENT)
+	min_alignment = BIGGEST_ALIGNMENT;
+      else
+	min_alignment = 1 << (floor_log2 (bits));
+    }
+
+  /* Having computed the minimum permissible alignment, enlarge it
+     if EXISTING_ALIGNMENT is greater.  */
+  return MAX (min_alignment, existing_alignment);
+}


 static unsigned int
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -25,6 +25,7 @@
 #include "tree.h"
 #include "hashtab.h"
 #include "varray.h"
+#include "hard-reg-set.h"

 /* Stack of pending (incomplete) sequences saved by `start_sequence'.
    Each element describes one pending sequence.
@@ -441,6 +442,8 @@

   /* True if dbr_schedule has already been called for this function.  */
   bool dbr_scheduled_p;
+
+  HARD_REG_SET asm_clobbers;
 };

 #define return_label (crtl->x_return_label)
@@ -687,4 +690,7 @@
 extern void used_types_insert (tree);

 extern int get_next_funcdef_no (void);
+
+extern unsigned int alignment_for_aligned_arrays (tree, unsigned int);
+
 #endif  /* GCC_FUNCTION_H */
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -651,8 +651,32 @@

 /* config.h can define SWITCHES_NEED_SPACES to control which options
    require spaces between the option and the argument.  */
+/* GCC Bugzilla PR11810 indicates that GCC does not correctly handle
+   "-ofoo.o", in that it records "-ofoo.o" as a temporary file to
+   delete, rather than "foo.o".
+
+   Unfortunately, Eclipse's makefile generators use the "-ofoo.o"
+   form.  See also CS Issue #3433.  So, although most users probably
+   use "-o foo.o", the "-ofoo.o" form is used in practice.
+
+   See this email thread for additional information:
+
+     http://gcc.gnu.org/ml/gcc/2008-07/msg00395.html
+
+   Therefore, we define SWITCHES_NEED_SPACES to include "o" by
+   default.  This causes "-ofoo.o" to be split into "-o foo.o" during
+   the initial processing of the command-line, before being seen by
+   the specs machinery.
+
+   A risk of this change is that tools which *require* the "-ofoo.o"
+   form will no longer work.  However, we know of no such tools, and
+   they would not have worked with the "-o foo.o" form anyhow.
+
+   If this general strategy is acceptable upstream, the best approach
+   might be simply to eliminate this macro, since the only definitions
+   in target files are also to the value "o".  */
 #ifndef SWITCHES_NEED_SPACES
-#define SWITCHES_NEED_SPACES ""
+#define SWITCHES_NEED_SPACES "o"
 #endif

 /* config.h can define ENDFILE_SPEC to override the default crtn files.  */
@@ -728,6 +752,8 @@
 %{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
     %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\
     %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\
+    %{Wno-poison-system-directories:--no-poison-system-directories}\
+    %{Werror=poison-system-directories:--error-poison-system-directories}\
     %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\
     %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)} %(mflib)\
     %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\
@@ -4616,27 +4642,53 @@

   if (argbuf_index > 0)
     {
-      int i, first;
+      int i, first, n;

       first = n_switches;
-      n_switches += argbuf_index;
-      switches = XRESIZEVEC (struct switchstr, switches, n_switches + 1);
+      n = n_switches + argbuf_index;
+      switches = XRESIZEVEC (struct switchstr, switches, n + 1);
+      switches[n] = switches[first];

       switches[n_switches] = switches[first];
       for (i = 0; i < argbuf_index; i++)
 	{
 	  struct switchstr *sw;
+	  const char *p = &argbuf[i][1];
+	  int c = *p;

 	  /* Each switch should start with '-'.  */
 	  if (argbuf[i][0] != '-')
 	    fatal ("switch '%s' does not start with '-'", argbuf[i]);

-	  sw = &switches[i + first];
+	  sw = &switches[n_switches];
 	  sw->part1 = &argbuf[i][1];
 	  sw->args = 0;
 	  sw->live_cond = 0;
 	  sw->validated = 0;
 	  sw->ordering = 0;
+
+	  /* Deal with option arguments in separate argv elements.  */
+	  if ((SWITCH_TAKES_ARG (c) > (p[1] != 0))
+	      || WORD_SWITCH_TAKES_ARG (p))
+	    {
+	      int j = 0;
+	      int n_args = WORD_SWITCH_TAKES_ARG (p);
+
+	      if (n_args == 0)
+		{
+		  /* Count only the option arguments in separate argv elements.  */
+		  n_args = SWITCH_TAKES_ARG (c) - (p[1] != 0);
+		}
+	      if (i + n_args >= argbuf_index)
+		fatal ("argument to '-%s' is missing", p);
+	      switches[n_switches].args
+		= XNEWVEC (const char *, n_args + 1);
+	      while (j < n_args)
+		switches[n_switches].args[j++] = argbuf[++i];
+	      /* Null-terminate the vector.  */
+	      switches[n_switches].args[j] = 0;
+	    }
+	  n_switches++;
 	}
     }
 }
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -172,6 +172,7 @@
 #include "hashtab.h"
 #include "df.h"
 #include "dbgcnt.h"
+#include "target.h"

 /* Propagate flow information through back edges and thus enable PRE's
    moving loop invariant calculations out of loops.
@@ -1744,7 +1745,9 @@
 	     REG_EQUIV notes and if the argument slot is used somewhere
 	     explicitly, it means address of parameter has been taken,
 	     so we should not extend the lifetime of the pseudo.  */
-	  && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
+	  && (note == NULL_RTX || ! MEM_P (XEXP (note, 0)))
+	  && ! (targetm.cannot_copy_insn_p && INSN_P (insn)
+		&& targetm.cannot_copy_insn_p (insn)))
 	{
 	  /* An expression is not anticipatable if its operands are
 	     modified before this insn or if this is not the only SET in
--- a/gcc/genautomata.c
+++ b/gcc/genautomata.c
@@ -1,5 +1,5 @@
 /* Pipeline hazard description translator.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
    Free Software Foundation, Inc.

    Written by Vladimir Makarov <vmakarov@redhat.com>
@@ -22,21 +22,25 @@

 /* References:

-   1. Detecting pipeline structural hazards quickly. T. Proebsting,
+   1. The finite state automaton based pipeline hazard recognizer and
+      instruction scheduler in GCC.  V. Makarov.  Proceedings of GCC
+      summit, 2003.
+
+   2. Detecting pipeline structural hazards quickly. T. Proebsting,
       C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
       Principles of Programming Languages, pages 280--286, 1994.

       This article is a good start point to understand usage of finite
       state automata for pipeline hazard recognizers.  But I'd
-      recommend the 2nd article for more deep understanding.
+      recommend the 1st and 3rd article for more deep understanding.

-   2. Efficient Instruction Scheduling Using Finite State Automata:
+   3. Efficient Instruction Scheduling Using Finite State Automata:
       V. Bala and N. Rubin, Proceedings of MICRO-28.  This is the best
       article about usage of finite state automata for pipeline hazard
       recognizers.

-   The current implementation is different from the 2nd article in the
-   following:
+   The current implementation is described in the 1st article and it
+   is different from the 3rd article in the following:

    1. New operator `|' (alternative) is permitted in functional unit
       reservation which can be treated deterministically and
@@ -463,7 +467,10 @@
      insn.  */
   int insn_num;
   /* The following field value is list of bypasses in which given insn
-     is output insn.  */
+     is output insn.  Bypasses with the same input insn stay one after
+     another in the list in the same order as their occurrences in the
+     description but the bypass without a guard stays always the last
+     in a row of bypasses with the same input insn.  */
   struct bypass_decl *bypass_list;

   /* The following fields are defined by automaton generator.  */
@@ -2367,18 +2374,67 @@
 }


-/* The function searches for bypass with given IN_INSN_RESERV in given
-   BYPASS_LIST.  */
-static struct bypass_decl *
-find_bypass (struct bypass_decl *bypass_list,
-	     struct insn_reserv_decl *in_insn_reserv)
-{
-  struct bypass_decl *bypass;
-
-  for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
-    if (bypass->in_insn_reserv == in_insn_reserv)
-      break;
-  return bypass;
+/* The function inserts BYPASS in the list of bypasses of the
+   corresponding output insn.  The order of bypasses in the list is
+   decribed in a comment for member `bypass_list' (see above).  If
+   there is already the same bypass in the list the function reports
+   this and does nothing.  */
+static void
+insert_bypass (struct bypass_decl *bypass)
+{
+  struct bypass_decl *curr, *last;
+  struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv;
+  struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv;
+
+  for (curr = out_insn_reserv->bypass_list, last = NULL;
+       curr != NULL;
+       last = curr, curr = curr->next)
+    if (curr->in_insn_reserv == in_insn_reserv)
+      {
+	if ((bypass->bypass_guard_name != NULL
+	     && curr->bypass_guard_name != NULL
+	     && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name))
+	    || bypass->bypass_guard_name == curr->bypass_guard_name)
+	  {
+	    if (bypass->bypass_guard_name == NULL)
+	      {
+		if (!w_flag)
+		  error ("the same bypass `%s - %s' is already defined",
+			 bypass->out_insn_name, bypass->in_insn_name);
+		else
+		  warning (0, "the same bypass `%s - %s' is already defined",
+			   bypass->out_insn_name, bypass->in_insn_name);
+	      }
+	    else if (!w_flag)
+	      error ("the same bypass `%s - %s' (guard %s) is already defined",
+		     bypass->out_insn_name, bypass->in_insn_name,
+		     bypass->bypass_guard_name);
+	    else
+	      warning
+		(0, "the same bypass `%s - %s' (guard %s) is already defined",
+		 bypass->out_insn_name, bypass->in_insn_name,
+		 bypass->bypass_guard_name);
+	    return;
+	  }
+	if (curr->bypass_guard_name == NULL)
+	  break;
+	if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv)
+	  {
+	    last = curr;
+	    break;
+	  }
+
+      }
+  if (last == NULL)
+    {
+      bypass->next = out_insn_reserv->bypass_list;
+      out_insn_reserv->bypass_list = bypass;
+    }
+  else
+    {
+      bypass->next = last->next;
+      last->next = bypass;
+    }
 }

 /* The function processes pipeline description declarations, checks
@@ -2391,7 +2447,6 @@
   decl_t decl_in_table;
   decl_t out_insn_reserv;
   decl_t in_insn_reserv;
-  struct bypass_decl *bypass;
   int automaton_presence;
   int i;

@@ -2514,36 +2569,7 @@
 		= DECL_INSN_RESERV (out_insn_reserv);
 	      DECL_BYPASS (decl)->in_insn_reserv
 		= DECL_INSN_RESERV (in_insn_reserv);
-	      bypass
-		= find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list,
-			       DECL_BYPASS (decl)->in_insn_reserv);
-	      if (bypass != NULL)
-		{
-		  if (DECL_BYPASS (decl)->latency == bypass->latency)
-		    {
-		      if (!w_flag)
-			error
-			  ("the same bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		      else
-			warning
-			  (0, "the same bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		    }
-		  else
-		    error ("bypass `%s - %s' is already defined",
-			   DECL_BYPASS (decl)->out_insn_name,
-			   DECL_BYPASS (decl)->in_insn_name);
-		}
-	      else
-		{
-		  DECL_BYPASS (decl)->next
-		    = DECL_INSN_RESERV (out_insn_reserv)->bypass_list;
-		  DECL_INSN_RESERV (out_insn_reserv)->bypass_list
-		    = DECL_BYPASS (decl);
-		}
+	      insert_bypass (DECL_BYPASS (decl));
 	    }
 	}
     }
@@ -8159,19 +8185,32 @@
 			    (advance_cycle_insn_decl)->insn_num));
 	    fprintf (output_file, "        case %d:\n",
 		     bypass->in_insn_reserv->insn_num);
-	    if (bypass->bypass_guard_name == NULL)
-	      fprintf (output_file, "          return %d;\n",
-		       bypass->latency);
-	    else
+	    for (;;)
 	      {
-		fprintf (output_file,
-			 "          if (%s (%s, %s))\n",
-			 bypass->bypass_guard_name, INSN_PARAMETER_NAME,
-			 INSN2_PARAMETER_NAME);
-		fprintf (output_file,
-			 "            return %d;\n          break;\n",
-			 bypass->latency);
+		if (bypass->bypass_guard_name == NULL)
+		  {
+		    gcc_assert (bypass->next == NULL
+				|| (bypass->in_insn_reserv
+				    != bypass->next->in_insn_reserv));
+		    fprintf (output_file, "          return %d;\n",
+			     bypass->latency);
+		  }
+		else
+		  {
+		    fprintf (output_file,
+			     "          if (%s (%s, %s))\n",
+			     bypass->bypass_guard_name, INSN_PARAMETER_NAME,
+			     INSN2_PARAMETER_NAME);
+		    fprintf (output_file, "            return %d;\n",
+			     bypass->latency);
+		  }
+		if (bypass->next == NULL
+		    || bypass->in_insn_reserv != bypass->next->in_insn_reserv)
+		  break;
+		bypass = bypass->next;
 	      }
+	    if (bypass->bypass_guard_name != NULL)
+	      fprintf (output_file, "          break;\n");
 	  }
 	fputs ("        }\n      break;\n", output_file);
       }
--- a/gcc/gengtype-lex.c
+++ /dev/null
@@ -1,2638 +0,0 @@
-#line 2 "gengtype-lex.c"
-
-#line 4 "gengtype-lex.c"
-
-#define  YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with  platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN               (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN              (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN              (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX               (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX              (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX              (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX              (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX             (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX             (4294967295U)
-#endif
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else	/* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif	/* defined (__STDC__) */
-#endif	/* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index.  If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* Enter a start condition.  This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN (yy_start) = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state.  The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START (((yy_start) - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE yyrestart(yyin  )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#define YY_BUF_SIZE 16384
-#endif
-
-/* The state buf must be large enough to hold one state per character in the main buffer.
- */
-#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-extern int yyleng;
-
-extern FILE *yyin, *yyout;
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
-    #define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
-	do \
-		{ \
-		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
-		*yy_cp = (yy_hold_char); \
-		YY_RESTORE_YY_MORE_OFFSET \
-		(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
-		YY_DO_BEFORE_ACTION; /* set up yytext again */ \
-		} \
-	while ( 0 )
-
-#define unput(c) yyunput( c, (yytext_ptr)  )
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
-	{
-	FILE *yy_input_file;
-
-	char *yy_ch_buf;		/* input buffer */
-	char *yy_buf_pos;		/* current position in input buffer */
-
-	/* Size of input buffer in bytes, not including room for EOB
-	 * characters.
-	 */
-	yy_size_t yy_buf_size;
-
-	/* Number of characters read into yy_ch_buf, not including EOB
-	 * characters.
-	 */
-	int yy_n_chars;
-
-	/* Whether we "own" the buffer - i.e., we know we created it,
-	 * and can realloc() it to grow it, and should free() it to
-	 * delete it.
-	 */
-	int yy_is_our_buffer;
-
-	/* Whether this is an "interactive" input source; if so, and
-	 * if we're using stdio for input, then we want to use getc()
-	 * instead of fread(), to make sure we stop fetching input after
-	 * each newline.
-	 */
-	int yy_is_interactive;
-
-	/* Whether we're considered to be at the beginning of a line.
-	 * If so, '^' rules will be active on the next match, otherwise
-	 * not.
-	 */
-	int yy_at_bol;
-
-    int yy_bs_lineno; /**< The line count. */
-    int yy_bs_column; /**< The column count. */
-
-	/* Whether to try to fill the input buffer when we reach the
-	 * end of it.
-	 */
-	int yy_fill_buffer;
-
-	int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
-	/* When an EOF's been seen but there's still some text to process
-	 * then we mark the buffer as YY_EOF_PENDING, to indicate that we
-	 * shouldn't try reading from the input source any more.  We might
-	 * still have a bunch of tokens to match, though, because of
-	 * possible backing-up.
-	 *
-	 * When we actually see the EOF, we change the status to "new"
-	 * (via yyrestart()), so that the user can continue scanning by
-	 * just pointing yyin at a new input file.
-	 */
-#define YY_BUFFER_EOF_PENDING 2
-
-	};
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* Stack of input buffers. */
-static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
-static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
-static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
-                          ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
-                          : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
-
-/* yy_hold_char holds the character lost when yytext is formed. */
-static char yy_hold_char;
-static int yy_n_chars;		/* number of characters read into yy_ch_buf */
-int yyleng;
-
-/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
-static int yy_init = 0;		/* whether we need to initialize */
-static int yy_start = 0;	/* start state number */
-
-/* Flag which is used to allow yywrap()'s to do buffer switches
- * instead of setting up a fresh yyin.  A bit of a hack ...
- */
-static int yy_did_buffer_switch_on_eof;
-
-void yyrestart (FILE *input_file  );
-void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer  );
-YY_BUFFER_STATE yy_create_buffer (FILE *file,int size  );
-void yy_delete_buffer (YY_BUFFER_STATE b  );
-void yy_flush_buffer (YY_BUFFER_STATE b  );
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer  );
-void yypop_buffer_state (void );
-
-static void yyensure_buffer_stack (void );
-static void yy_load_buffer_state (void );
-static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file  );
-
-#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
-
-YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size  );
-YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str  );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len  );
-
-void *yyalloc (yy_size_t  );
-void *yyrealloc (void *,yy_size_t  );
-void yyfree (void *  );
-
-#define yy_new_buffer yy_create_buffer
-
-#define yy_set_interactive(is_interactive) \
-	{ \
-	if ( ! YY_CURRENT_BUFFER ){ \
-        yyensure_buffer_stack (); \
-		YY_CURRENT_BUFFER_LVALUE =    \
-            yy_create_buffer(yyin,YY_BUF_SIZE ); \
-	} \
-	YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
-	}
-
-#define yy_set_bol(at_bol) \
-	{ \
-	if ( ! YY_CURRENT_BUFFER ){\
-        yyensure_buffer_stack (); \
-		YY_CURRENT_BUFFER_LVALUE =    \
-            yy_create_buffer(yyin,YY_BUF_SIZE ); \
-	} \
-	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
-	}
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define yywrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
-
-typedef int yy_state_type;
-
-extern int yylineno;
-
-int yylineno = 1;
-
-extern char *yytext;
-#define yytext_ptr yytext
-
-static yy_state_type yy_get_previous_state (void );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state  );
-static int yy_get_next_buffer (void );
-static void yy_fatal_error (yyconst char msg[]  );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
-	(yytext_ptr) = yy_bp; \
-	yyleng = (size_t) (yy_cp - yy_bp); \
-	(yy_hold_char) = *yy_cp; \
-	*yy_cp = '\0'; \
-	(yy_c_buf_p) = yy_cp;
-
-#define YY_NUM_RULES 49
-#define YY_END_OF_BUFFER 50
-/* This struct is not used in this scanner,
-   but its presence is necessary. */
-struct yy_trans_info
-	{
-	flex_int32_t yy_verify;
-	flex_int32_t yy_nxt;
-	};
-static yyconst flex_int16_t yy_accept[445] =
-    {   0,
-        0,    0,    0,    0,    0,    0,    0,    0,   50,   36,
-       36,   33,   45,   36,   45,   34,   36,   36,   34,   34,
-       34,   34,   34,   31,   10,   10,   31,   29,   31,   31,
-       31,   20,   31,   31,   31,   31,   31,   31,   31,   31,
-       31,   31,   31,   31,   31,   31,   31,   31,   31,   31,
-       31,   10,   31,   41,   39,   46,   46,    0,    0,    0,
-       37,    0,    0,    0,   38,   32,   34,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,   34,   34,   34,   34,
-       34,   10,    0,   25,    0,    0,    0,    0,    9,   20,
-       24,    0,    0,    0,    0,    0,    0,    0,    0,   26,
-
-       11,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,   10,    0,    0,    0,    0,
-       42,   44,   43,    0,   35,    0,    0,    0,    0,    0,
-        0,   34,   34,   34,   34,   34,   34,   27,   28,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,   30,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,   34,   34,   34,   34,   34,   34,
-        0,    0,    0,   13,    0,   14,    0,    0,    0,    0,
-       22,   22,    0,    0,    0,    0,    0,    0,    0,    0,
-
-        0,    0,    0,   48,    0,    0,    0,    0,    0,    0,
-        0,   34,   34,   34,   34,   34,   34,    0,    0,    0,
-        0,    0,   17,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,   34,
-       34,   34,   34,   34,    3,    0,    0,    0,    0,   12,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,   15,    0,    0,    0,
-        0,    0,    0,    0,   34,    4,    5,    2,   34,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-
-        0,    0,    0,    0,    0,    0,    0,    0,    0,   16,
-        0,    0,    0,    0,   34,    1,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,   22,   22,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,   34,   34,   34,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-       21,    0,    0,    0,    0,    0,    0,   34,    7,    6,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,   18,
-        0,    0,    0,   34,    0,    0,    0,    0,    0,    0,
-        0,    0,   19,    0,    0,   47,   34,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,   34,    0,    0,
-
-        0,    0,    0,    0,    0,    0,   34,    0,   24,   24,
-        0,    0,    0,    0,    0,    0,    0,   34,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    8,
-        0,   23,    0,    0,    0,    0,    0,   40,    0,    0,
-        0,    0,    0,    0
-    } ;
-
-static yyconst flex_int32_t yy_ec[256] =
-    {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
-        2,    2,    2,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    2,    1,    4,    5,    1,    6,    1,    7,    8,
-        9,   10,    1,    6,    6,   11,   12,   13,   13,   13,
-       13,   13,   13,   13,   13,   13,   13,    6,    6,    6,
-        6,    6,    1,    1,   14,   15,   16,   17,   18,   19,
-       20,   21,   22,   23,   23,   24,   25,   26,   27,   28,
-       23,   29,   30,   31,   32,   33,   34,   23,   35,   23,
-       36,   37,   38,    1,   39,    1,   40,   41,   42,   43,
-
-       44,   45,   46,   47,   48,   49,   49,   50,   51,   52,
-       53,   54,   49,   55,   56,   57,   58,   59,   49,   60,
-       61,   62,    6,    6,    6,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1
-    } ;
-
-static yyconst flex_int32_t yy_meta[63] =
-    {   0,
-        1,    2,    3,    1,    1,    1,    1,    1,    4,    5,
-        1,    1,    6,    7,    7,    7,    7,    7,    7,    7,
-        7,    7,    7,    7,    7,    7,    7,    7,    7,    7,
-        7,    7,    7,    7,    7,    8,    1,    1,    9,    9,
-        9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
-        9,    9,    9,    9,    9,    9,    9,    9,    9,    9,
-        9,    9
-    } ;
-
-static yyconst flex_int16_t yy_base[483] =
-    {   0,
-        0,   38,   96,   12,   12,   13,   15,   16, 1028, 1444,
-       32,   51,   20,  990, 1016,    0,  157,   18, 1007,  964,
-      966,  961,  969, 1444,   25,   27,   27, 1444,  983, 1008,
-     1008, 1004,  215,  253,    5,   32,   29,  974,   45,  962,
-      996,   35,   38,   39,   40,   41,  134,   42,  136,  137,
-      138,   75,  996,    0, 1444,  985,  984,  166,  964,  162,
-     1444,    0,  987,  990, 1444, 1444,    0,  186,  165,  974,
-      931,  933,  928,  936,  168,  943,  967,  928,  140,  930,
-      935,   87,  167, 1444,  979,  974,  977,  968, 1444,  950,
-     1444,  935,  934,  145,   52,   46,  148,  165,  922, 1444,
-
-     1444,  152,  156,  155,  170,  173,  175,  182,  183,  185,
-      211,  214,  222,  218,  221,  269,  957,  956,  291,    0,
-     1444, 1444, 1444,  922, 1444,  937,  898,  195,  900,  905,
-      907,  912,  906,  892,  890,  903,  893, 1444, 1444,  209,
-      254,  251,  353,  248,  391,  354,  350,  351,  340,  355,
-      341,  429,  339,  356,  344,  347,  360,  390,   43,  361,
-      391,  395,  429, 1444,    0,    0,  280,  906,  900,  886,
-      884,  897,  872,  876,  890,  867,  873,  878,  876,  866,
-      381,  348,  382, 1444,  384, 1444,  389,  397,  491,  398,
-     1444,  528,  418,  399,  420,  477,  478,  422,  421,  480,
-
-      479,    0,  449, 1444,  884,  861,  867,  872,  870,  860,
-      859,  892,  857,  866,  850,  862,  586,  493,  496,  494,
-      484,  624, 1444,    0,  878,  876,  876,  834,  839,  841,
-      832,  830,  199,  830,  490,  499,  486,  492,  488,  489,
-      662,    0,  863,  828,  837,  821,  833,    0,  832,  859,
-      700,  738,  776,  829, 1444,  431,  258,  437,  515, 1444,
-      846,  844,  841,  817,  829,  809,  319,  815,  813,  478,
-      809,  512,  528,  520,  525,  814, 1444,    0,  833,    0,
-        0,    0,  803,  551,  808, 1444, 1444, 1444,  852,  383,
-      521,  530,  539,  822,  829,  813,  793,  787,  802,  801,
-
-      556,  793,  783,  785,  792,  787,  523,  545,  535, 1444,
-        0,  795,    0,  561,  585, 1444,  555,  343,  581,  584,
-      794,  811,  792,  773,  772, 1444,    0,  771,  783,  772,
-      764,  552,  890,  558,    0,  623,  778,  784,  928,  966,
-      583,  593,  594,  613,  792,  792,  771,  761,  746,  591,
-     1444, 1004,    0,  778,    0,    0,  766,  776, 1444, 1444,
-      620,  621,  626,  627,  653,  777,  769,  775, 1042, 1444,
-        0,  772,  787,  767,  556,  577,  615,  649,  629,  762,
-      753,  774, 1444,    0,  763, 1444,  773,  632,  659,  662,
-      656,  654,  754,  742,  753,    0,  754,  729,  665,  688,
-
-      667,  744,  742,  683,    0,  695,  692,  689,  715,  722,
-      699,  711,  701,  666,  673,    0,  705, 1080,  704,  749,
-      751,  753,  756,  663,  658,  618,  593,    0,    0, 1444,
-      758, 1444,  760,  600,  588,  543,  483, 1444,  439,  386,
-      247,  206,  167, 1444, 1118, 1127, 1136, 1145, 1154, 1158,
-     1167, 1176, 1185, 1194, 1202, 1211, 1220, 1229, 1238, 1247,
-     1256, 1265, 1273, 1282, 1290, 1298, 1306, 1314, 1323, 1331,
-     1340, 1349, 1357, 1365, 1374, 1383, 1392, 1400, 1409, 1417,
-     1426, 1435
-    } ;
-
-static yyconst flex_int16_t yy_def[483] =
-    {   0,
-      445,  445,  444,    3,  446,  446,  446,  446,  444,  444,
-      444,  444,  447,  448,  449,  450,  444,  444,  450,  450,
-      450,  450,  450,  444,  444,  444,  451,  444,  452,  444,
-      444,  444,  453,  453,   34,   34,   34,   34,   34,  454,
-      444,   34,   34,   34,   34,   34,   34,   34,   34,   34,
-       34,  444,  455,  456,  444,  457,  457,  444,  444,  447,
-      444,  447,  444,  448,  444,  444,  450,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  450,  450,  450,  450,
-      450,  444,  451,  444,  451,  444,  452,  444,  444,  444,
-      444,   34,   34,   34,   34,   34,   34,   34,  454,  444,
-
-      444,   34,   34,   34,   34,   34,   34,   34,   34,   34,
-       34,   34,   34,   34,   34,  444,  455,  455,  444,  458,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  450,  450,  450,  450,  450,  450,  444,  444,   34,
-       34,   34,  453,   34,  453,   34,   34,   34,   34,   34,
-       34,  453,   34,   34,   34,   34,   34,   34,   34,   34,
-       34,   34,  119,  444,  119,  459,  444,  444,  444,  444,
-      444,  444,  444,  444,  450,  450,  450,  450,  450,  450,
-       34,   34,   34,  444,   34,  444,   34,   34,  453,   34,
-      444,  444,   34,   34,   34,   34,   34,   34,   34,   34,
-
-       34,  460,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  450,  450,  450,  450,  450,  450,   34,   34,   34,
-       34,  453,  444,  192,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,   34,   34,   34,   34,   34,   34,
-      453,  461,  444,  444,  444,  444,  444,  462,  444,  450,
-      450,  450,  450,  450,  444,   34,   34,   34,   34,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,   34,   34,   34,   34,  453,  444,  463,  444,  464,
-      465,  466,  444,  444,  450,  444,  444,  444,  450,   34,
-       34,   34,   34,  444,  444,  444,  444,  444,  444,  444,
-
-      467,  444,  444,  444,  444,  444,   34,   34,   34,  444,
-      468,  444,  469,  444,  450,  444,   34,   34,   34,   34,
-      444,  444,  444,  444,  444,  444,  192,  444,  444,  444,
-      444,   34,  453,   34,  470,  444,  444,  450,  450,  450,
-       34,   34,   34,   34,  444,  444,  444,  444,  444,   34,
-      444,  453,  471,  444,  472,  473,  444,  450,  444,  444,
-       34,   34,   34,   34,   34,  444,  444,  444,  453,  444,
-      474,  444,  444,  450,   34,   34,   34,   34,   34,  444,
-      444,  444,  444,  475,  444,  444,  450,   34,   34,   34,
-       34,   34,  444,  444,  444,  476,  444,  450,   34,   34,
-
-       34,  444,  444,  444,  477,  444,  450,   34,  444,  478,
-       34,  444,  444,  444,  444,  479,  444,  450,   34,  444,
-      478,  478,  480,  444,  444,  444,  444,  481,  482,  444,
-      444,  444,  480,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,    0,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444
-    } ;
-
-static yyconst flex_int16_t yy_nxt[1507] =
-    {   0,
-       10,   11,   12,   13,   10,   10,   14,   10,   10,   10,
-       10,   15,   10,   52,   55,   55,   53,   55,   55,   75,
-      444,   56,   56,   61,   57,   57,   82,   82,   82,   82,
-       84,   92,   94,   58,   58,   10,   10,   10,   10,   17,
-       12,   13,   18,   10,   14,   10,   10,   10,   10,   15,
-       10,   59,   58,   58,   19,   92,   62,   95,   92,   96,
-       76,   92,   98,   85,   92,   92,   92,   92,   92,   92,
-       59,   92,   92,   10,   10,   10,  116,   82,   92,  117,
-      143,   20,  105,  142,  103,  109,  198,  102,   82,   82,
-      104,  106,  107,   21,   22,   23,   24,   25,   26,   27,
-
-       24,   28,   29,   28,   28,   28,   30,   31,   32,   33,
-       34,   35,   33,   36,   33,   37,   38,   33,   33,   33,
-       33,   33,   33,   33,   33,   33,   33,   33,   39,   33,
-       33,   40,   41,   24,   33,   33,   42,   43,   44,   45,
-       33,   33,   33,   46,   33,   47,   33,   48,   33,   49,
-       33,   50,   33,   51,   33,   33,   33,   33,   68,   58,
-       92,   69,   92,   92,   92,   61,   75,   58,   58,   75,
-       84,   92,  141,   70,   92,  110,   59,  144,   92,  134,
-      145,   92,   92,  112,  113,   59,  108,   68,   58,  115,
-       69,   92,  111,  114,  135,  147,   92,  301,   62,   92,
-
-       71,   92,   70,   85,  146,   59,  148,   76,   92,   92,
-       76,   92,   72,   73,   74,   91,   91,   91,   91,   91,
-       91,   91,   91,   91,   91,   91,   91,  151,  149,   71,
-      150,  152,  181,  153,  170,   92,  301,   92,  154,  155,
-       92,   72,   73,   74,   92,  269,  270,   92,   92,  171,
-       91,   91,   91,   91,   91,   91,   91,   91,   91,   91,
-       91,   91,   91,   91,   91,  156,  157,  158,  161,  182,
-      116,   82,  160,  117,   92,  183,  162,   92,  185,   93,
-       92,  203,  203,  159,   92,  443,  291,  204,   91,   91,
-       91,  163,  163,  164,  163,  163,  163,  163,  163,  163,
-
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  165,
-      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
-      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
-      165,  165,  165,  184,  184,  184,  184,  184,  184,  184,
-      184,  184,  184,  184,  184,   92,   92,   92,  219,   92,
-       92,  300,  342,   92,   92,  301,   92,   92,  188,  190,
-       92,   92,   92,  194,  152,  195,   92,   92,  184,  184,
-      184,  186,  186,  186,  186,  186,  186,  186,  186,  186,
-
-      186,  186,  186,  152,  152,  189,  187,   92,   92,   92,
-       92,  442,  193,  317,  196,   92,   92,   92,  199,  218,
-      220,   92,  221,   92,   92,   92,  186,  186,  186,  191,
-      192,  192,  191,  191,  191,  191,  191,  191,  191,  191,
-      191,  197,  201,  200,   92,  222,   92,   92,   92,  236,
-      203,  203,  290,  152,  152,  441,  204,   92,  292,  237,
-      239,  235,  240,   92,  191,  191,  191,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  223,  223,  223,  223,  223,  223,  223,  223,  223,
-
-      223,  223,  223,   92,   92,   92,   92,  256,  258,  257,
-       92,  273,   92,  301,   92,   92,   92,  259,   92,   92,
-       92,  238,   92,  304,  158,   92,  223,  223,  223,  224,
-      224,  241,  272,  152,  152,  275,  293,  274,   92,  305,
-      273,   92,  225,  226,  152,  276,   92,   92,  227,   92,
-      307,   92,  314,  314,   92,  320,   92,  327,  327,  318,
-      319,   92,  314,  314,  440,   92,  274,  308,  228,  229,
-      230,   92,  309,  341,  334,  231,  332,  232,   92,  388,
-      337,   92,   92,  233,   92,  234,  255,  255,  255,  255,
-      255,  255,  255,  255,  255,  255,  255,  255,  338,  343,
-
-      333,  344,  389,   92,  361,  439,  339,   92,  350,   92,
-       92,  340,  340,  352,  362,  363,  301,   92,  437,   92,
-       92,  255,  255,  255,  260,  260,  260,  260,  260,  260,
-      260,  260,  260,  260,  260,  260,  354,  375,  390,   92,
-      376,   92,  364,  377,  355,  369,   92,   92,  152,  356,
-      356,  365,   92,   92,  392,   92,  436,  378,   92,  260,
-      260,  260,  277,  277,  277,  277,  277,  277,  277,  277,
-      277,  277,  277,  277,  379,   92,  399,  401,  400,   92,
-       92,  408,   92,  435,  152,   92,  434,  391,   92,  409,
-      409,   92,  411,   92,  427,  410,  426,  277,  277,  277,
-
-      286,  286,  286,  286,  286,  286,  286,  286,  286,  286,
-      286,  286,  414,  418,   92,   92,  420,  420,  418,  418,
-      425,  415,  421,  422,  422,   92,  429,  419,  424,  152,
-       92,  429,  429,  417,  152,  286,  286,  286,  287,  287,
-      287,  287,  287,  287,  287,  287,  287,  287,  287,  287,
-      420,  420,  422,  422,  422,  422,  421,  431,  431,  431,
-      431,  431,  431,  413,  432,  412,  432,  407,  432,  406,
-      404,  403,  402,  287,  287,  287,  288,  288,  288,  288,
-      288,  288,  288,  288,  288,  288,  288,  288,  398,  397,
-      395,  394,  393,  387,  386,  385,  382,  381,  380,  374,
-
-      373,  372,  301,  301,  368,  367,  366,  358,  357,  304,
-      349,  288,  288,  288,  310,  310,  310,  310,  310,  310,
-      310,  310,  310,  310,  310,  310,  348,  301,  301,  301,
-      347,  346,  345,  336,  331,  330,  329,  328,  301,  325,
-      324,  301,  301,  323,  322,  321,  315,  313,  312,  310,
-      310,  310,  316,  316,  316,  316,  316,  316,  316,  316,
-      316,  316,  316,  316,  306,  303,  302,  299,  298,  297,
-      296,  295,  294,  289,  285,  284,  283,  282,  281,  280,
-      279,  271,  268,  267,  266,  265,  264,  316,  316,  316,
-      351,  351,  351,  351,  351,  351,  351,  351,  351,  351,
-
-      351,  351,  263,  262,  261,  254,  253,  252,  251,  250,
-      249,  248,  247,  246,  245,  244,  243,  217,  216,  215,
-      214,  213,  212,  211,  210,  351,  351,  351,  359,  359,
-      359,  359,  359,  359,  359,  359,  359,  359,  359,  359,
-      209,  208,  207,  206,  205,  180,  179,  178,  177,  176,
-      175,  174,  173,  172,  169,  168,  167,  118,  118,  100,
-      140,   92,   90,  359,  359,  359,  360,  360,  360,  360,
-      360,  360,  360,  360,  360,  360,  360,  360,  139,  444,
-      138,  444,  137,  136,  133,  132,  131,  130,  129,  128,
-      127,  126,  444,  125,  124,  123,  122,  118,  101,  100,
-
-       97,  360,  360,  360,  370,  370,  370,  370,  370,  370,
-      370,  370,  370,  370,  370,  370,   90,   89,   88,   87,
-       81,   80,   79,   78,   77,   66,   64,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  370,
-      370,  370,  383,  383,  383,  383,  383,  383,  383,  383,
-      383,  383,  383,  383,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  383,  383,  383,
-      430,  430,  430,  430,  430,  430,  430,  430,  430,  430,
-      430,  430,  444,  444,  444,  444,  444,  444,  444,  444,
-
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  430,  430,  430,   16,   16,
-       16,   16,   16,   16,   16,   16,   16,   54,   54,   54,
-       54,   54,   54,   54,   54,   54,   60,   60,   60,   60,
-       60,   60,   60,   60,   60,   63,   63,   63,   63,   63,
-       63,   63,   63,   63,   65,   65,   65,   65,   65,   65,
-       65,   65,   65,   67,   67,  444,   67,   83,   83,   83,
-       83,   83,   83,   83,   83,   83,   86,   86,   86,   86,
-       86,   86,   86,   86,   86,   92,   92,   92,   92,   92,
-       92,   92,   92,   92,   99,   99,   99,   99,   99,   99,
-
-       99,  444,   99,  119,  444,  444,  444,  444,  444,  444,
-      119,  120,  120,  444,  120,  444,  120,  120,  120,  120,
-      121,  121,  121,  121,  121,  121,  121,  121,  121,  166,
-      166,  444,  166,  444,  166,  166,  166,  166,  202,  202,
-      444,  202,  444,  202,  202,  202,  202,  242,  242,  444,
-      242,  444,  242,  242,  242,  242,  278,  278,  444,  278,
-      444,  278,  278,  278,  278,  255,  255,  255,  255,  255,
-      444,  444,  255,  311,  311,  444,  311,  444,  311,  311,
-      311,  311,  286,  286,  286,  286,  286,  444,  444,  286,
-      287,  287,  287,  287,  287,  444,  444,  287,  288,  288,
-
-      288,  288,  288,  444,  444,  288,  326,  326,  326,  326,
-      326,  444,  444,  326,  335,  335,  444,  335,  444,  335,
-      335,  335,  335,  316,  316,  316,  316,  316,  444,  444,
-      316,  353,  353,  444,  353,  444,  353,  353,  353,  353,
-      371,  371,  444,  371,  444,  371,  371,  371,  371,  359,
-      359,  359,  359,  359,  444,  444,  359,  360,  360,  360,
-      360,  360,  444,  444,  360,  384,  384,  444,  384,  444,
-      384,  384,  384,  384,  396,  396,  444,  396,  444,  396,
-      396,  396,  396,  405,  405,  444,  405,  444,  405,  405,
-      405,  405,  416,  416,  444,  416,  444,  416,  416,  416,
-
-      416,  423,  423,  444,  444,  444,  423,  444,  423,  428,
-      428,  444,  428,  444,  428,  428,  428,  428,  433,  433,
-      433,  444,  433,  433,  444,  433,  438,  438,  444,  438,
-      444,  438,  438,  438,  438,  430,  430,  430,  430,  430,
-      444,  444,  430,    9,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-
-      444,  444,  444,  444,  444,  444
-    } ;
-
-static yyconst flex_int16_t yy_chk[1507] =
-    {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    4,    5,    6,    4,    7,    8,   18,
-        0,    5,    6,   13,    7,    8,   25,   25,   26,   26,
-       27,   35,   35,   11,   11,    1,    1,    1,    2,    2,
-        2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
-        2,   11,   12,   12,    2,   37,   13,   36,   36,   37,
-       18,   42,   39,   27,   43,   44,   45,   46,   48,  159,
-       12,   39,   96,    2,    2,    2,   52,   52,   95,   52,
-       96,    2,   44,   95,   43,   48,  159,   42,   82,   82,
-       43,   45,   46,    2,    2,    2,    3,    3,    3,    3,
-
-        3,    3,    3,    3,    3,    3,    3,    3,    3,    3,
-        3,    3,    3,    3,    3,    3,    3,    3,    3,    3,
-        3,    3,    3,    3,    3,    3,    3,    3,    3,    3,
-        3,    3,    3,    3,    3,    3,    3,    3,    3,    3,
-        3,    3,    3,    3,    3,    3,    3,    3,    3,    3,
-        3,    3,    3,    3,    3,    3,    3,    3,   17,   17,
-       47,   17,   49,   50,   51,   60,   69,   58,   58,   75,
-       83,   94,   94,   17,   97,   49,   17,   97,  102,   79,
-       98,  104,  103,   50,   50,   58,   47,   68,   68,   51,
-       68,   98,   49,   50,   79,  103,  105,  443,   60,  106,
-
-       17,  107,   68,   83,  102,   68,  104,   69,  108,  109,
-       75,  110,   17,   17,   17,   33,   33,   33,   33,   33,
-       33,   33,   33,   33,   33,   33,   33,  107,  105,   68,
-      106,  107,  140,  108,  128,  140,  442,  111,  109,  110,
-      112,   68,   68,   68,  114,  233,  233,  115,  113,  128,
-       33,   33,   33,   34,   34,   34,   34,   34,   34,   34,
-       34,   34,   34,   34,   34,  111,  112,  113,  115,  141,
-      116,  116,  114,  116,  144,  142,  115,  142,  144,   34,
-      141,  167,  167,  113,  257,  441,  257,  167,   34,   34,
-       34,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-
-      119,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-      119,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-      119,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-      119,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-      119,  119,  119,  119,  119,  119,  119,  119,  119,  119,
-      119,  119,  119,  143,  143,  143,  143,  143,  143,  143,
-      143,  143,  143,  143,  143,  153,  149,  151,  182,  318,
-      155,  267,  318,  156,  182,  267,  147,  148,  149,  151,
-      146,  150,  154,  155,  153,  156,  157,  160,  143,  143,
-      143,  145,  145,  145,  145,  145,  145,  145,  145,  145,
-
-      145,  145,  145,  146,  147,  150,  148,  181,  183,  290,
-      185,  440,  154,  290,  157,  187,  158,  161,  160,  181,
-      183,  162,  185,  188,  190,  194,  145,  145,  145,  152,
-      152,  152,  152,  152,  152,  152,  152,  152,  152,  152,
-      152,  158,  162,  161,  193,  187,  195,  199,  198,  194,
-      203,  203,  256,  188,  190,  439,  203,  256,  258,  195,
-      198,  193,  199,  258,  152,  152,  152,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  163,  163,  163,  163,  163,  163,  163,  163,  163,
-      163,  189,  189,  189,  189,  189,  189,  189,  189,  189,
-
-      189,  189,  189,  196,  197,  201,  200,  218,  220,  219,
-      221,  236,  237,  437,  239,  240,  235,  221,  238,  218,
-      220,  197,  219,  270,  201,  236,  189,  189,  189,  192,
-      192,  200,  235,  196,  238,  237,  259,  236,  272,  270,
-      273,  259,  192,  192,  239,  240,  274,  291,  192,  307,
-      272,  275,  284,  284,  273,  293,  292,  301,  301,  291,
-      292,  309,  314,  314,  436,  293,  273,  274,  192,  192,
-      192,  308,  275,  317,  309,  192,  307,  192,  332,  375,
-      314,  317,  375,  192,  334,  192,  217,  217,  217,  217,
-      217,  217,  217,  217,  217,  217,  217,  217,  315,  319,
-
-      308,  320,  376,  376,  341,  435,  315,  319,  332,  341,
-      320,  315,  315,  334,  342,  343,  434,  350,  427,  342,
-      343,  217,  217,  217,  222,  222,  222,  222,  222,  222,
-      222,  222,  222,  222,  222,  222,  336,  361,  377,  344,
-      362,  377,  344,  363,  336,  350,  361,  362,  388,  336,
-      336,  344,  363,  364,  379,  379,  426,  364,  388,  222,
-      222,  222,  241,  241,  241,  241,  241,  241,  241,  241,
-      241,  241,  241,  241,  365,  378,  389,  391,  390,  365,
-      392,  399,  391,  425,  392,  389,  424,  378,  390,  400,
-      400,  399,  401,  401,  415,  400,  414,  241,  241,  241,
-
-      251,  251,  251,  251,  251,  251,  251,  251,  251,  251,
-      251,  251,  404,  407,  400,  408,  409,  409,  407,  407,
-      413,  404,  409,  410,  410,  411,  417,  408,  412,  411,
-      419,  417,  417,  406,  419,  251,  251,  251,  252,  252,
-      252,  252,  252,  252,  252,  252,  252,  252,  252,  252,
-      420,  420,  421,  421,  422,  422,  420,  423,  423,  431,
-      431,  433,  433,  403,  423,  402,  431,  398,  433,  397,
-      395,  394,  393,  252,  252,  252,  253,  253,  253,  253,
-      253,  253,  253,  253,  253,  253,  253,  253,  387,  385,
-      382,  381,  380,  374,  373,  372,  368,  367,  366,  358,
-
-      357,  354,  349,  348,  347,  346,  345,  338,  337,  331,
-      330,  253,  253,  253,  276,  276,  276,  276,  276,  276,
-      276,  276,  276,  276,  276,  276,  329,  328,  325,  324,
-      323,  322,  321,  312,  306,  305,  304,  303,  302,  300,
-      299,  298,  297,  296,  295,  294,  285,  283,  279,  276,
-      276,  276,  289,  289,  289,  289,  289,  289,  289,  289,
-      289,  289,  289,  289,  271,  269,  268,  266,  265,  264,
-      263,  262,  261,  254,  250,  249,  247,  246,  245,  244,
-      243,  234,  232,  231,  230,  229,  228,  289,  289,  289,
-      333,  333,  333,  333,  333,  333,  333,  333,  333,  333,
-
-      333,  333,  227,  226,  225,  216,  215,  214,  213,  212,
-      211,  210,  209,  208,  207,  206,  205,  180,  179,  178,
-      177,  176,  175,  174,  173,  333,  333,  333,  339,  339,
-      339,  339,  339,  339,  339,  339,  339,  339,  339,  339,
-      172,  171,  170,  169,  168,  137,  136,  135,  134,  133,
-      132,  131,  130,  129,  127,  126,  124,  118,  117,   99,
-       93,   92,   90,  339,  339,  339,  340,  340,  340,  340,
-      340,  340,  340,  340,  340,  340,  340,  340,   88,   87,
-       86,   85,   81,   80,   78,   77,   76,   74,   73,   72,
-       71,   70,   64,   63,   59,   57,   56,   53,   41,   40,
-
-       38,  340,  340,  340,  352,  352,  352,  352,  352,  352,
-      352,  352,  352,  352,  352,  352,   32,   31,   30,   29,
-       23,   22,   21,   20,   19,   15,   14,    9,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,  352,
-      352,  352,  369,  369,  369,  369,  369,  369,  369,  369,
-      369,  369,  369,  369,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,  369,  369,  369,
-      418,  418,  418,  418,  418,  418,  418,  418,  418,  418,
-      418,  418,    0,    0,    0,    0,    0,    0,    0,    0,
-
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,  418,  418,  418,  445,  445,
-      445,  445,  445,  445,  445,  445,  445,  446,  446,  446,
-      446,  446,  446,  446,  446,  446,  447,  447,  447,  447,
-      447,  447,  447,  447,  447,  448,  448,  448,  448,  448,
-      448,  448,  448,  448,  449,  449,  449,  449,  449,  449,
-      449,  449,  449,  450,  450,    0,  450,  451,  451,  451,
-      451,  451,  451,  451,  451,  451,  452,  452,  452,  452,
-      452,  452,  452,  452,  452,  453,  453,  453,  453,  453,
-      453,  453,  453,  453,  454,  454,  454,  454,  454,  454,
-
-      454,    0,  454,  455,    0,    0,    0,    0,    0,    0,
-      455,  456,  456,    0,  456,    0,  456,  456,  456,  456,
-      457,  457,  457,  457,  457,  457,  457,  457,  457,  458,
-      458,    0,  458,    0,  458,  458,  458,  458,  459,  459,
-        0,  459,    0,  459,  459,  459,  459,  460,  460,    0,
-      460,    0,  460,  460,  460,  460,  461,  461,    0,  461,
-        0,  461,  461,  461,  461,  462,  462,  462,  462,  462,
-        0,    0,  462,  463,  463,    0,  463,    0,  463,  463,
-      463,  463,  464,  464,  464,  464,  464,    0,    0,  464,
-      465,  465,  465,  465,  465,    0,    0,  465,  466,  466,
-
-      466,  466,  466,    0,    0,  466,  467,  467,  467,  467,
-      467,    0,    0,  467,  468,  468,    0,  468,    0,  468,
-      468,  468,  468,  469,  469,  469,  469,  469,    0,    0,
-      469,  470,  470,    0,  470,    0,  470,  470,  470,  470,
-      471,  471,    0,  471,    0,  471,  471,  471,  471,  472,
-      472,  472,  472,  472,    0,    0,  472,  473,  473,  473,
-      473,  473,    0,    0,  473,  474,  474,    0,  474,    0,
-      474,  474,  474,  474,  475,  475,    0,  475,    0,  475,
-      475,  475,  475,  476,  476,    0,  476,    0,  476,  476,
-      476,  476,  477,  477,    0,  477,    0,  477,  477,  477,
-
-      477,  478,  478,    0,    0,    0,  478,    0,  478,  479,
-      479,    0,  479,    0,  479,  479,  479,  479,  480,  480,
-      480,    0,  480,  480,    0,  480,  481,  481,    0,  481,
-        0,  481,  481,  481,  481,  482,  482,  482,  482,  482,
-        0,    0,  482,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-      444,  444,  444,  444,  444,  444,  444,  444,  444,  444,
-
-      444,  444,  444,  444,  444,  444
-    } ;
-
-static yy_state_type yy_last_accepting_state;
-static char *yy_last_accepting_cpos;
-
-extern int yy_flex_debug;
-int yy_flex_debug = 0;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
-#line 1 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* -*- indented-text -*- */
-/* Process source files and output type information.
-   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009
-   Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
-<http://www.gnu.org/licenses/>.  */
-#line 23 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-#include "bconfig.h"
-#include "system.h"
-
-#define malloc xmalloc
-#define realloc xrealloc
-
-#include "gengtype.h"
-
-#define YY_NO_INPUT
-#define YY_DECL int yylex (const char **yylval)
-#define yyterminate() return EOF_TOKEN
-
-struct fileloc lexer_line;
-int lexer_toplevel_done;
-
-static void
-update_lineno (const char *l, size_t len)
-{
-  while (len-- > 0)
-    if (*l++ == '\n')
-      lexer_line.line++;
-}
-
-
-#line 986 "gengtype-lex.c"
-
-#define INITIAL 0
-#define in_struct 1
-#define in_struct_comment 2
-#define in_comment 3
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-static int yy_init_globals (void );
-
-/* Accessor methods to globals.
-   These are made visible to non-reentrant scanners for convenience. */
-
-int yylex_destroy (void );
-
-int yyget_debug (void );
-
-void yyset_debug (int debug_flag  );
-
-YY_EXTRA_TYPE yyget_extra (void );
-
-void yyset_extra (YY_EXTRA_TYPE user_defined  );
-
-FILE *yyget_in (void );
-
-void yyset_in  (FILE * in_str  );
-
-FILE *yyget_out (void );
-
-void yyset_out  (FILE * out_str  );
-
-int yyget_leng (void );
-
-char *yyget_text (void );
-
-int yyget_lineno (void );
-
-void yyset_lineno (int line_number  );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int yywrap (void );
-#else
-extern int yywrap (void );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int );
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * );
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (void );
-#else
-static int input (void );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#define YY_READ_BUF_SIZE 8192
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO fwrite( yytext, yyleng, 1, yyout )
-#endif
-
-/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
-	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
-		{ \
-		int c = '*'; \
-		unsigned n; \
-		for ( n = 0; n < max_size && \
-			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
-			buf[n] = (char) c; \
-		if ( c == '\n' ) \
-			buf[n++] = (char) c; \
-		if ( c == EOF && ferror( yyin ) ) \
-			YY_FATAL_ERROR( "input in flex scanner failed" ); \
-		result = n; \
-		} \
-	else \
-		{ \
-		errno=0; \
-		while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
-			{ \
-			if( errno != EINTR) \
-				{ \
-				YY_FATAL_ERROR( "input in flex scanner failed" ); \
-				break; \
-				} \
-			errno=0; \
-			clearerr(yyin); \
-			} \
-		}\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int yylex (void);
-
-#define YY_DECL int yylex (void)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
-	if ( yyleng > 0 ) \
-		YY_CURRENT_BUFFER_LVALUE->yy_at_bol = \
-				(yytext[yyleng - 1] == '\n'); \
-	YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
-	register yy_state_type yy_current_state;
-	register char *yy_cp, *yy_bp;
-	register int yy_act;
-
-#line 58 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-
-  /* Do this on entry to yylex():  */
-  *yylval = 0;
-  if (lexer_toplevel_done)
-    {
-      BEGIN(INITIAL);
-      lexer_toplevel_done = 0;
-    }
-
-  /* Things we look for in skipping mode: */
-#line 1183 "gengtype-lex.c"
-
-	if ( !(yy_init) )
-		{
-		(yy_init) = 1;
-
-#ifdef YY_USER_INIT
-		YY_USER_INIT;
-#endif
-
-		if ( ! (yy_start) )
-			(yy_start) = 1;	/* first start state */
-
-		if ( ! yyin )
-			yyin = stdin;
-
-		if ( ! yyout )
-			yyout = stdout;
-
-		if ( ! YY_CURRENT_BUFFER ) {
-			yyensure_buffer_stack ();
-			YY_CURRENT_BUFFER_LVALUE =
-				yy_create_buffer(yyin,YY_BUF_SIZE );
-		}
-
-		yy_load_buffer_state( );
-		}
-
-	while ( 1 )		/* loops until end-of-file is reached */
-		{
-		yy_cp = (yy_c_buf_p);
-
-		/* Support of yytext. */
-		*yy_cp = (yy_hold_char);
-
-		/* yy_bp points to the position in yy_ch_buf of the start of
-		 * the current run.
-		 */
-		yy_bp = yy_cp;
-
-		yy_current_state = (yy_start);
-		yy_current_state += YY_AT_BOL();
-yy_match:
-		do
-			{
-			register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
-			if ( yy_accept[yy_current_state] )
-				{
-				(yy_last_accepting_state) = yy_current_state;
-				(yy_last_accepting_cpos) = yy_cp;
-				}
-			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-				{
-				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 445 )
-					yy_c = yy_meta[(unsigned int) yy_c];
-				}
-			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-			++yy_cp;
-			}
-		while ( yy_current_state != 444 );
-		yy_cp = (yy_last_accepting_cpos);
-		yy_current_state = (yy_last_accepting_state);
-
-yy_find_action:
-		yy_act = yy_accept[yy_current_state];
-
-		YY_DO_BEFORE_ACTION;
-
-do_action:	/* This label is used only to access EOF actions. */
-
-		switch ( yy_act )
-	{ /* beginning of action switch */
-			case 0: /* must back up */
-			/* undo the effects of YY_DO_BEFORE_ACTION */
-			*yy_cp = (yy_hold_char);
-			yy_cp = (yy_last_accepting_cpos);
-			yy_current_state = (yy_last_accepting_state);
-			goto yy_find_action;
-
-case 1:
-/* rule 1 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 69 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return TYPEDEF;
-}
-	YY_BREAK
-case 2:
-/* rule 2 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 73 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return STRUCT;
-}
-	YY_BREAK
-case 3:
-/* rule 3 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 77 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return UNION;
-}
-	YY_BREAK
-case 4:
-/* rule 4 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 81 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return EXTERN;
-}
-	YY_BREAK
-case 5:
-/* rule 5 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 85 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return STATIC;
-}
-	YY_BREAK
-case 6:
-/* rule 6 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 90 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return DEFVEC_OP;
-}
-	YY_BREAK
-case 7:
-/* rule 7 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 94 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return DEFVEC_I;
-}
-	YY_BREAK
-case 8:
-/* rule 8 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 98 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  BEGIN(in_struct);
-  return DEFVEC_ALLOC;
-}
-	YY_BREAK
-
-
-case 9:
-YY_RULE_SETUP
-#line 106 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ BEGIN(in_struct_comment); }
-	YY_BREAK
-case 10:
-/* rule 10 can match eol */
-YY_RULE_SETUP
-#line 108 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ update_lineno (yytext, yyleng); }
-	YY_BREAK
-case 11:
-/* rule 11 can match eol */
-YY_RULE_SETUP
-#line 109 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ lexer_line.line++; }
-	YY_BREAK
-case 12:
-/* rule 12 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 5;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 111 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* don't care */
-	YY_BREAK
-case 13:
-/* rule 13 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 3;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 112 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return GTY_TOKEN; }
-	YY_BREAK
-case 14:
-/* rule 14 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 3;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 113 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return VEC_TOKEN; }
-	YY_BREAK
-case 15:
-/* rule 15 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 5;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 114 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return UNION; }
-	YY_BREAK
-case 16:
-/* rule 16 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 6;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 115 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return STRUCT; }
-	YY_BREAK
-case 17:
-/* rule 17 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 4;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 116 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return ENUM; }
-	YY_BREAK
-case 18:
-/* rule 18 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 9;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 117 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return PTR_ALIAS; }
-	YY_BREAK
-case 19:
-/* rule 19 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 10;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 118 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return NESTED_PTR; }
-	YY_BREAK
-case 20:
-YY_RULE_SETUP
-#line 119 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return NUM; }
-	YY_BREAK
-case 21:
-/* rule 21 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 120 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1);
-  return PARAM_IS;
-}
-	YY_BREAK
-case 22:
-/* rule 22 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-#line 126 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-case 23:
-/* rule 23 can match eol */
-YY_RULE_SETUP
-#line 126 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  size_t len;
-
-  for (len = yyleng; ISSPACE (yytext[len-1]); len--)
-    ;
-
-  *yylval = XDUPVAR (const char, yytext, len, len+1);
-  update_lineno (yytext, yyleng);
-  return SCALAR;
-}
-	YY_BREAK
-case 24:
-/* rule 24 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp -= 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 138 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1);
-  return ID;
-}
-	YY_BREAK
-case 25:
-/* rule 25 can match eol */
-YY_RULE_SETUP
-#line 143 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1);
-  return STRING;
-}
-	YY_BREAK
-/* This "terminal" avoids having to parse integer constant expressions.  */
-case 26:
-/* rule 26 can match eol */
-YY_RULE_SETUP
-#line 148 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1);
-  return ARRAY;
-}
-	YY_BREAK
-case 27:
-/* rule 27 can match eol */
-YY_RULE_SETUP
-#line 152 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng);
-  return CHAR;
-}
-	YY_BREAK
-case 28:
-YY_RULE_SETUP
-#line 157 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return ELLIPSIS; }
-	YY_BREAK
-case 29:
-YY_RULE_SETUP
-#line 158 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ return yytext[0]; }
-	YY_BREAK
-/* ignore pp-directives */
-case 30:
-/* rule 30 can match eol */
-YY_RULE_SETUP
-#line 161 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{lexer_line.line++;}
-	YY_BREAK
-case 31:
-YY_RULE_SETUP
-#line 163 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  error_at_line (&lexer_line, "unexpected character `%s'", yytext);
-}
-	YY_BREAK
-
-case 32:
-YY_RULE_SETUP
-#line 168 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ BEGIN(in_comment); }
-	YY_BREAK
-case 33:
-/* rule 33 can match eol */
-YY_RULE_SETUP
-#line 169 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ lexer_line.line++; }
-	YY_BREAK
-case 34:
-#line 171 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-case 35:
-/* rule 35 can match eol */
-#line 172 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-case 36:
-/* rule 36 can match eol */
-YY_RULE_SETUP
-#line 172 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* do nothing */
-	YY_BREAK
-case 37:
-/* rule 37 can match eol */
-YY_RULE_SETUP
-#line 173 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ update_lineno (yytext, yyleng); }
-	YY_BREAK
-case 38:
-/* rule 38 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 174 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* do nothing */
-	YY_BREAK
-
-case 39:
-/* rule 39 can match eol */
-YY_RULE_SETUP
-#line 177 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ lexer_line.line++; }
-	YY_BREAK
-case 40:
-#line 179 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-case 41:
-YY_RULE_SETUP
-#line 179 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* do nothing */
-	YY_BREAK
-case 42:
-/* rule 42 can match eol */
-*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
-(yy_c_buf_p) = yy_cp = yy_bp + 1;
-YY_DO_BEFORE_ACTION; /* set up yytext again */
-YY_RULE_SETUP
-#line 180 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* do nothing */
-	YY_BREAK
-
-case 43:
-YY_RULE_SETUP
-#line 182 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ BEGIN(INITIAL); }
-	YY_BREAK
-case 44:
-YY_RULE_SETUP
-#line 183 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{ BEGIN(in_struct); }
-	YY_BREAK
-case 45:
-#line 186 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-case 46:
-YY_RULE_SETUP
-#line 186 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  error_at_line (&lexer_line,
-		 "unterminated comment or string; unexpected EOF");
-}
-	YY_BREAK
-case 47:
-/* rule 47 can match eol */
-YY_RULE_SETUP
-#line 191 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-/* do nothing */
-	YY_BREAK
-case 48:
-/* rule 48 can match eol */
-YY_RULE_SETUP
-#line 192 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-{
-  error_at_line (&lexer_line, "stray GTY marker");
-}
-	YY_BREAK
-case 49:
-YY_RULE_SETUP
-#line 196 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-YY_FATAL_ERROR( "flex scanner jammed" );
-	YY_BREAK
-#line 1653 "gengtype-lex.c"
-case YY_STATE_EOF(INITIAL):
-case YY_STATE_EOF(in_struct):
-case YY_STATE_EOF(in_struct_comment):
-case YY_STATE_EOF(in_comment):
-	yyterminate();
-
-	case YY_END_OF_BUFFER:
-		{
-		/* Amount of text matched not including the EOB char. */
-		int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
-
-		/* Undo the effects of YY_DO_BEFORE_ACTION. */
-		*yy_cp = (yy_hold_char);
-		YY_RESTORE_YY_MORE_OFFSET
-
-		if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
-			{
-			/* We're scanning a new file or input source.  It's
-			 * possible that this happened because the user
-			 * just pointed yyin at a new source and called
-			 * yylex().  If so, then we have to assure
-			 * consistency between YY_CURRENT_BUFFER and our
-			 * globals.  Here is the right place to do so, because
-			 * this is the first action (other than possibly a
-			 * back-up) that will match for the new input source.
-			 */
-			(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
-			YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
-			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
-			}
-
-		/* Note that here we test for yy_c_buf_p "<=" to the position
-		 * of the first EOB in the buffer, since yy_c_buf_p will
-		 * already have been incremented past the NUL character
-		 * (since all states make transitions on EOB to the
-		 * end-of-buffer state).  Contrast this with the test
-		 * in input().
-		 */
-		if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
-			{ /* This was really a NUL. */
-			yy_state_type yy_next_state;
-
-			(yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
-
-			yy_current_state = yy_get_previous_state(  );
-
-			/* Okay, we're now positioned to make the NUL
-			 * transition.  We couldn't have
-			 * yy_get_previous_state() go ahead and do it
-			 * for us because it doesn't know how to deal
-			 * with the possibility of jamming (and we don't
-			 * want to build jamming into it because then it
-			 * will run more slowly).
-			 */
-
-			yy_next_state = yy_try_NUL_trans( yy_current_state );
-
-			yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-
-			if ( yy_next_state )
-				{
-				/* Consume the NUL. */
-				yy_cp = ++(yy_c_buf_p);
-				yy_current_state = yy_next_state;
-				goto yy_match;
-				}
-
-			else
-				{
-				yy_cp = (yy_last_accepting_cpos);
-				yy_current_state = (yy_last_accepting_state);
-				goto yy_find_action;
-				}
-			}
-
-		else switch ( yy_get_next_buffer(  ) )
-			{
-			case EOB_ACT_END_OF_FILE:
-				{
-				(yy_did_buffer_switch_on_eof) = 0;
-
-				if ( yywrap( ) )
-					{
-					/* Note: because we've taken care in
-					 * yy_get_next_buffer() to have set up
-					 * yytext, we can now set up
-					 * yy_c_buf_p so that if some total
-					 * hoser (like flex itself) wants to
-					 * call the scanner after we return the
-					 * YY_NULL, it'll still work - another
-					 * YY_NULL will get returned.
-					 */
-					(yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
-
-					yy_act = YY_STATE_EOF(YY_START);
-					goto do_action;
-					}
-
-				else
-					{
-					if ( ! (yy_did_buffer_switch_on_eof) )
-						YY_NEW_FILE;
-					}
-				break;
-				}
-
-			case EOB_ACT_CONTINUE_SCAN:
-				(yy_c_buf_p) =
-					(yytext_ptr) + yy_amount_of_matched_text;
-
-				yy_current_state = yy_get_previous_state(  );
-
-				yy_cp = (yy_c_buf_p);
-				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-				goto yy_match;
-
-			case EOB_ACT_LAST_MATCH:
-				(yy_c_buf_p) =
-				&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
-
-				yy_current_state = yy_get_previous_state(  );
-
-				yy_cp = (yy_c_buf_p);
-				yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-				goto yy_find_action;
-			}
-		break;
-		}
-
-	default:
-		YY_FATAL_ERROR(
-			"fatal flex scanner internal error--no action found" );
-	} /* end of action switch */
-		} /* end of scanning one token */
-} /* end of yylex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- *	EOB_ACT_LAST_MATCH -
- *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- *	EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (void)
-{
-    	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
-	register char *source = (yytext_ptr);
-	register int number_to_move, i;
-	int ret_val;
-
-	if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
-		YY_FATAL_ERROR(
-		"fatal flex scanner internal error--end of buffer missed" );
-
-	if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
-		{ /* Don't try to fill the buffer, so this is an EOF. */
-		if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
-			{
-			/* We matched a single character, the EOB, so
-			 * treat this as a final EOF.
-			 */
-			return EOB_ACT_END_OF_FILE;
-			}
-
-		else
-			{
-			/* We matched some text prior to the EOB, first
-			 * process it.
-			 */
-			return EOB_ACT_LAST_MATCH;
-			}
-		}
-
-	/* Try to read more data. */
-
-	/* First move last chars to start of buffer. */
-	number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
-
-	for ( i = 0; i < number_to_move; ++i )
-		*(dest++) = *(source++);
-
-	if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
-		/* don't do the read, it's not guaranteed to return an EOF,
-		 * just force an EOF
-		 */
-		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
-
-	else
-		{
-			int num_to_read =
-			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
-
-		while ( num_to_read <= 0 )
-			{ /* Not enough room in the buffer - grow it. */
-
-			/* just a shorter name for the current buffer */
-			YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
-			int yy_c_buf_p_offset =
-				(int) ((yy_c_buf_p) - b->yy_ch_buf);
-
-			if ( b->yy_is_our_buffer )
-				{
-				int new_size = b->yy_buf_size * 2;
-
-				if ( new_size <= 0 )
-					b->yy_buf_size += b->yy_buf_size / 8;
-				else
-					b->yy_buf_size *= 2;
-
-				b->yy_ch_buf = (char *)
-					/* Include room in for 2 EOB chars. */
-					yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2  );
-				}
-			else
-				/* Can't grow it, we don't own it. */
-				b->yy_ch_buf = 0;
-
-			if ( ! b->yy_ch_buf )
-				YY_FATAL_ERROR(
-				"fatal error - scanner input buffer overflow" );
-
-			(yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
-
-			num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
-						number_to_move - 1;
-
-			}
-
-		if ( num_to_read > YY_READ_BUF_SIZE )
-			num_to_read = YY_READ_BUF_SIZE;
-
-		/* Read in more data. */
-		YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
-			(yy_n_chars), (size_t) num_to_read );
-
-		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
-		}
-
-	if ( (yy_n_chars) == 0 )
-		{
-		if ( number_to_move == YY_MORE_ADJ )
-			{
-			ret_val = EOB_ACT_END_OF_FILE;
-			yyrestart(yyin  );
-			}
-
-		else
-			{
-			ret_val = EOB_ACT_LAST_MATCH;
-			YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
-				YY_BUFFER_EOF_PENDING;
-			}
-		}
-
-	else
-		ret_val = EOB_ACT_CONTINUE_SCAN;
-
-	if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
-		/* Extend the array by 50%, plus the number we really need. */
-		yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
-		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
-		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
-			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
-	}
-
-	(yy_n_chars) += number_to_move;
-	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
-	YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
-
-	(yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
-	return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
-    static yy_state_type yy_get_previous_state (void)
-{
-	register yy_state_type yy_current_state;
-	register char *yy_cp;
-
-	yy_current_state = (yy_start);
-	yy_current_state += YY_AT_BOL();
-
-	for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
-		{
-		register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
-		if ( yy_accept[yy_current_state] )
-			{
-			(yy_last_accepting_state) = yy_current_state;
-			(yy_last_accepting_cpos) = yy_cp;
-			}
-		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-			{
-			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 445 )
-				yy_c = yy_meta[(unsigned int) yy_c];
-			}
-		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-		}
-
-	return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- *	next_state = yy_try_NUL_trans( current_state );
- */
-    static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
-{
-	register int yy_is_jam;
-    	register char *yy_cp = (yy_c_buf_p);
-
-	register YY_CHAR yy_c = 1;
-	if ( yy_accept[yy_current_state] )
-		{
-		(yy_last_accepting_state) = yy_current_state;
-		(yy_last_accepting_cpos) = yy_cp;
-		}
-	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-		{
-		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 445 )
-			yy_c = yy_meta[(unsigned int) yy_c];
-		}
-	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 444);
-
-	return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
-    static int yyinput (void)
-#else
-    static int input  (void)
-#endif
-
-{
-	int c;
-
-	*(yy_c_buf_p) = (yy_hold_char);
-
-	if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
-		{
-		/* yy_c_buf_p now points to the character we want to return.
-		 * If this occurs *before* the EOB characters, then it's a
-		 * valid NUL; if not, then we've hit the end of the buffer.
-		 */
-		if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
-			/* This was really a NUL. */
-			*(yy_c_buf_p) = '\0';
-
-		else
-			{ /* need more input */
-			int offset = (yy_c_buf_p) - (yytext_ptr);
-			++(yy_c_buf_p);
-
-			switch ( yy_get_next_buffer(  ) )
-				{
-				case EOB_ACT_LAST_MATCH:
-					/* This happens because yy_g_n_b()
-					 * sees that we've accumulated a
-					 * token and flags that we need to
-					 * try matching the token before
-					 * proceeding.  But for input(),
-					 * there's no matching to consider.
-					 * So convert the EOB_ACT_LAST_MATCH
-					 * to EOB_ACT_END_OF_FILE.
-					 */
-
-					/* Reset buffer status. */
-					yyrestart(yyin );
-
-					/*FALLTHROUGH*/
-
-				case EOB_ACT_END_OF_FILE:
-					{
-					if ( yywrap( ) )
-						return EOF;
-
-					if ( ! (yy_did_buffer_switch_on_eof) )
-						YY_NEW_FILE;
-#ifdef __cplusplus
-					return yyinput();
-#else
-					return input();
-#endif
-					}
-
-				case EOB_ACT_CONTINUE_SCAN:
-					(yy_c_buf_p) = (yytext_ptr) + offset;
-					break;
-				}
-			}
-		}
-
-	c = *(unsigned char *) (yy_c_buf_p);	/* cast for 8-bit char's */
-	*(yy_c_buf_p) = '\0';	/* preserve yytext */
-	(yy_hold_char) = *++(yy_c_buf_p);
-
-	YY_CURRENT_BUFFER_LVALUE->yy_at_bol = (c == '\n');
-
-	return c;
-}
-#endif	/* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- *
- * @note This function does not reset the start condition to @c INITIAL .
- */
-    void yyrestart  (FILE * input_file )
-{
-
-	if ( ! YY_CURRENT_BUFFER ){
-        yyensure_buffer_stack ();
-		YY_CURRENT_BUFFER_LVALUE =
-            yy_create_buffer(yyin,YY_BUF_SIZE );
-	}
-
-	yy_init_buffer(YY_CURRENT_BUFFER,input_file );
-	yy_load_buffer_state( );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- *
- */
-    void yy_switch_to_buffer  (YY_BUFFER_STATE  new_buffer )
-{
-
-	/* TODO. We should be able to replace this entire function body
-	 * with
-	 *		yypop_buffer_state();
-	 *		yypush_buffer_state(new_buffer);
-     */
-	yyensure_buffer_stack ();
-	if ( YY_CURRENT_BUFFER == new_buffer )
-		return;
-
-	if ( YY_CURRENT_BUFFER )
-		{
-		/* Flush out information for old buffer. */
-		*(yy_c_buf_p) = (yy_hold_char);
-		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
-		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
-		}
-
-	YY_CURRENT_BUFFER_LVALUE = new_buffer;
-	yy_load_buffer_state( );
-
-	/* We don't actually know whether we did this switch during
-	 * EOF (yywrap()) processing, but the only time this flag
-	 * is looked at is after yywrap() is called, so it's safe
-	 * to go ahead and always set it.
-	 */
-	(yy_did_buffer_switch_on_eof) = 1;
-}
-
-static void yy_load_buffer_state  (void)
-{
-    	(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
-	(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
-	yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
-	(yy_hold_char) = *(yy_c_buf_p);
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
- * @return the allocated buffer state.
- */
-    YY_BUFFER_STATE yy_create_buffer  (FILE * file, int  size )
-{
-	YY_BUFFER_STATE b;
-
-	b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state )  );
-	if ( ! b )
-		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
-	b->yy_buf_size = size;
-
-	/* yy_ch_buf has to be 2 characters longer than the size given because
-	 * we need to put in 2 end-of-buffer characters.
-	 */
-	b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2  );
-	if ( ! b->yy_ch_buf )
-		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
-	b->yy_is_our_buffer = 1;
-
-	yy_init_buffer(b,file );
-
-	return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with yy_create_buffer()
- *
- */
-    void yy_delete_buffer (YY_BUFFER_STATE  b )
-{
-
-	if ( ! b )
-		return;
-
-	if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
-		YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
-	if ( b->yy_is_our_buffer )
-		yyfree((void *) b->yy_ch_buf  );
-
-	yyfree((void *) b  );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a yyrestart() or at EOF.
- */
-    static void yy_init_buffer  (YY_BUFFER_STATE  b, FILE * file )
-
-{
-	int oerrno = errno;
-
-	yy_flush_buffer(b );
-
-	b->yy_input_file = file;
-	b->yy_fill_buffer = 1;
-
-    /* If b is the current buffer, then yy_init_buffer was _probably_
-     * called from yyrestart() or through yy_get_next_buffer.
-     * In that case, we don't want to reset the lineno or column.
-     */
-    if (b != YY_CURRENT_BUFFER){
-        b->yy_bs_lineno = 1;
-        b->yy_bs_column = 0;
-    }
-
-        b->yy_is_interactive = 0;
-
-	errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
- */
-    void yy_flush_buffer (YY_BUFFER_STATE  b )
-{
-    	if ( ! b )
-		return;
-
-	b->yy_n_chars = 0;
-
-	/* We always need two end-of-buffer characters.  The first causes
-	 * a transition to the end-of-buffer state.  The second causes
-	 * a jam in that state.
-	 */
-	b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
-	b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
-	b->yy_buf_pos = &b->yy_ch_buf[0];
-
-	b->yy_at_bol = 1;
-	b->yy_buffer_status = YY_BUFFER_NEW;
-
-	if ( b == YY_CURRENT_BUFFER )
-		yy_load_buffer_state( );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- *  the current state. This function will allocate the stack
- *  if necessary.
- *  @param new_buffer The new state.
- *
- */
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
-{
-    	if (new_buffer == NULL)
-		return;
-
-	yyensure_buffer_stack();
-
-	/* This block is copied from yy_switch_to_buffer. */
-	if ( YY_CURRENT_BUFFER )
-		{
-		/* Flush out information for old buffer. */
-		*(yy_c_buf_p) = (yy_hold_char);
-		YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
-		YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
-		}
-
-	/* Only push if top exists. Otherwise, replace top. */
-	if (YY_CURRENT_BUFFER)
-		(yy_buffer_stack_top)++;
-	YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
-	/* copied from yy_switch_to_buffer. */
-	yy_load_buffer_state( );
-	(yy_did_buffer_switch_on_eof) = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- *  The next element becomes the new top.
- *
- */
-void yypop_buffer_state (void)
-{
-    	if (!YY_CURRENT_BUFFER)
-		return;
-
-	yy_delete_buffer(YY_CURRENT_BUFFER );
-	YY_CURRENT_BUFFER_LVALUE = NULL;
-	if ((yy_buffer_stack_top) > 0)
-		--(yy_buffer_stack_top);
-
-	if (YY_CURRENT_BUFFER) {
-		yy_load_buffer_state( );
-		(yy_did_buffer_switch_on_eof) = 1;
-	}
-}
-
-/* Allocates the stack if it does not exist.
- *  Guarantees space for at least one push.
- */
-static void yyensure_buffer_stack (void)
-{
-	int num_to_alloc;
-
-	if (!(yy_buffer_stack)) {
-
-		/* First allocation is just for 2 elements, since we don't know if this
-		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
-		 * immediate realloc on the next call.
-         */
-		num_to_alloc = 1;
-		(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
-								(num_to_alloc * sizeof(struct yy_buffer_state*)
-								);
-		if ( ! (yy_buffer_stack) )
-			YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
-
-		memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
-		(yy_buffer_stack_max) = num_to_alloc;
-		(yy_buffer_stack_top) = 0;
-		return;
-	}
-
-	if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
-
-		/* Increase the buffer to prepare for a possible push. */
-		int grow_size = 8 /* arbitrary grow size */;
-
-		num_to_alloc = (yy_buffer_stack_max) + grow_size;
-		(yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
-								((yy_buffer_stack),
-								num_to_alloc * sizeof(struct yy_buffer_state*)
-								);
-		if ( ! (yy_buffer_stack) )
-			YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
-
-		/* zero only the new slots.*/
-		memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
-		(yy_buffer_stack_max) = num_to_alloc;
-	}
-}
-
-/** Setup the input buffer state to scan directly from a user-specified character buffer.
- * @param base the character buffer
- * @param size the size in bytes of the character buffer
- *
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
-{
-	YY_BUFFER_STATE b;
-
-	if ( size < 2 ||
-	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
-	     base[size-1] != YY_END_OF_BUFFER_CHAR )
-		/* They forgot to leave room for the EOB's. */
-		return 0;
-
-	b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state )  );
-	if ( ! b )
-		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
-
-	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
-	b->yy_buf_pos = b->yy_ch_buf = base;
-	b->yy_is_our_buffer = 0;
-	b->yy_input_file = 0;
-	b->yy_n_chars = b->yy_buf_size;
-	b->yy_is_interactive = 0;
-	b->yy_at_bol = 1;
-	b->yy_fill_buffer = 0;
-	b->yy_buffer_status = YY_BUFFER_NEW;
-
-	yy_switch_to_buffer(b  );
-
-	return b;
-}
-
-/** Setup the input buffer state to scan a string. The next call to yylex() will
- * scan from a @e copy of @a str.
- * @param yystr a NUL-terminated string to scan
- *
- * @return the newly allocated buffer state object.
- * @note If you want to scan bytes that may contain NUL values, then use
- *       yy_scan_bytes() instead.
- */
-YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
-{
-
-	return yy_scan_bytes(yystr,strlen(yystr) );
-}
-
-/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
- * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
- *
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
-{
-	YY_BUFFER_STATE b;
-	char *buf;
-	yy_size_t n;
-	int i;
-
-	/* Get memory for full buffer, including space for trailing EOB's. */
-	n = _yybytes_len + 2;
-	buf = (char *) yyalloc(n  );
-	if ( ! buf )
-		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
-
-	for ( i = 0; i < _yybytes_len; ++i )
-		buf[i] = yybytes[i];
-
-	buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
-
-	b = yy_scan_buffer(buf,n );
-	if ( ! b )
-		YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
-
-	/* It's okay to grow etc. this buffer, and we should throw it
-	 * away when we're done.
-	 */
-	b->yy_is_our_buffer = 1;
-
-	return b;
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg )
-{
-    	(void) fprintf( stderr, "%s\n", msg );
-	exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
-	do \
-		{ \
-		/* Undo effects of setting up yytext. */ \
-        int yyless_macro_arg = (n); \
-        YY_LESS_LINENO(yyless_macro_arg);\
-		yytext[yyleng] = (yy_hold_char); \
-		(yy_c_buf_p) = yytext + yyless_macro_arg; \
-		(yy_hold_char) = *(yy_c_buf_p); \
-		*(yy_c_buf_p) = '\0'; \
-		yyleng = yyless_macro_arg; \
-		} \
-	while ( 0 )
-
-/* Accessor  methods (get/set functions) to struct members. */
-
-/** Get the current line number.
- *
- */
-int yyget_lineno  (void)
-{
-
-    return yylineno;
-}
-
-/** Get the input stream.
- *
- */
-FILE *yyget_in  (void)
-{
-        return yyin;
-}
-
-/** Get the output stream.
- *
- */
-FILE *yyget_out  (void)
-{
-        return yyout;
-}
-
-/** Get the length of the current token.
- *
- */
-int yyget_leng  (void)
-{
-        return yyleng;
-}
-
-/** Get the current token.
- *
- */
-
-char *yyget_text  (void)
-{
-        return yytext;
-}
-
-/** Set the current line number.
- * @param line_number
- *
- */
-void yyset_lineno (int  line_number )
-{
-
-    yylineno = line_number;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- *
- * @see yy_switch_to_buffer
- */
-void yyset_in (FILE *  in_str )
-{
-        yyin = in_str ;
-}
-
-void yyset_out (FILE *  out_str )
-{
-        yyout = out_str ;
-}
-
-int yyget_debug  (void)
-{
-        return yy_flex_debug;
-}
-
-void yyset_debug (int  bdebug )
-{
-        yy_flex_debug = bdebug ;
-}
-
-static int yy_init_globals (void)
-{
-        /* Initialization is the same as for the non-reentrant scanner.
-     * This function is called from yylex_destroy(), so don't allocate here.
-     */
-
-    (yy_buffer_stack) = 0;
-    (yy_buffer_stack_top) = 0;
-    (yy_buffer_stack_max) = 0;
-    (yy_c_buf_p) = (char *) 0;
-    (yy_init) = 0;
-    (yy_start) = 0;
-
-/* Defined in main.c */
-#ifdef YY_STDINIT
-    yyin = stdin;
-    yyout = stdout;
-#else
-    yyin = (FILE *) 0;
-    yyout = (FILE *) 0;
-#endif
-
-    /* For future reference: Set errno on error, since we are called by
-     * yylex_init()
-     */
-    return 0;
-}
-
-/* yylex_destroy is for both reentrant and non-reentrant scanners. */
-int yylex_destroy  (void)
-{
-
-    /* Pop the buffer stack, destroying each element. */
-	while(YY_CURRENT_BUFFER){
-		yy_delete_buffer(YY_CURRENT_BUFFER  );
-		YY_CURRENT_BUFFER_LVALUE = NULL;
-		yypop_buffer_state();
-	}
-
-	/* Destroy the stack itself. */
-	yyfree((yy_buffer_stack) );
-	(yy_buffer_stack) = NULL;
-
-    /* Reset the globals. This is important in a non-reentrant scanner so the next time
-     * yylex() is called, initialization will occur. */
-    yy_init_globals( );
-
-    return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
-{
-	register int i;
-	for ( i = 0; i < n; ++i )
-		s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s )
-{
-	register int n;
-	for ( n = 0; s[n]; ++n )
-		;
-
-	return n;
-}
-#endif
-
-void *yyalloc (yy_size_t  size )
-{
-	return (void *) malloc( size );
-}
-
-void *yyrealloc  (void * ptr, yy_size_t  size )
-{
-	/* The cast to (char *) in the following accommodates both
-	 * implementations that use char* generic pointers, and those
-	 * that use void* generic pointers.  It works with the latter
-	 * because both ANSI C and C++ allow castless assignment from
-	 * any pointer type to void*, and deal with argument conversions
-	 * as though doing an assignment.
-	 */
-	return (void *) realloc( (char *) ptr, size );
-}
-
-void yyfree (void * ptr )
-{
-	free( (char *) ptr );	/* see yyrealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#line 196 "/d/gcc-4.4.1/gcc-4.4.1/gcc/gengtype-lex.l"
-
-
-
-void
-yybegin (const char *fname)
-{
-  yyin = fopen (fname, "r");
-  if (yyin == NULL)
-    {
-      perror (fname);
-      exit (1);
-    }
-  lexer_line.file = fname;
-  lexer_line.line = 1;
-}
-
-void
-yyend (void)
-{
-  fclose (yyin);
-}
-
--- a/gcc/gengtype-lex.l
+++ b/gcc/gengtype-lex.l
@@ -48,7 +48,7 @@
 ID	[[:alpha:]_][[:alnum:]_]*
 WS	[[:space:]]+
 HWS	[ \t\r\v\f]*
-IWORD	short|long|(un)?signed|char|int|HOST_WIDE_INT|HOST_WIDEST_INT|bool|size_t|BOOL_BITFIELD|CPPCHAR_SIGNED_T|ino_t|dev_t
+IWORD	short|long|(un)?signed|char|int|HOST_WIDE_INT|HOST_WIDEST_INT|bool|size_t|BOOL_BITFIELD|CPPCHAR_SIGNED_T|ino_t|dev_t|HARD_REG_SET
 ITYPE	{IWORD}({WS}{IWORD})*
 EOID	[^[:alnum:]_]

--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -1990,6 +1990,23 @@
   SCHED_GROUP_P (insn) = 0;
 }

+/* Return true if scheduling INSN will finish current clock cycle.  */
+static bool
+insn_finishes_cycle_p (rtx insn)
+{
+  if (SCHED_GROUP_P (insn))
+    /* After issuing INSN, rest of the sched_group will be forced to issue
+       in order.  Don't make any plans for the rest of cycle.  */
+    return true;
+
+  /* Finishing the block will, apparently, finish the cycle.  */
+  if (current_sched_info->insn_finishes_block_p
+      && current_sched_info->insn_finishes_block_p (insn))
+    return true;
+
+  return false;
+}
+
 /* The following structure describe an entry of the stack of choices.  */
 struct choice_entry
 {
@@ -2168,7 +2185,10 @@
 	  delay = state_transition (state, insn);
 	  if (delay < 0)
 	    {
-	      if (state_dead_lock_p (state))
+	      if (state_dead_lock_p (state)
+		  || insn_finishes_cycle_p (insn))
+ 		/* We won't issue any more instructions in the next
+ 		   choice_state.  */
 		top->rest = 0;
 	      else
 		top->rest--;
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -335,3 +335,10 @@
 {
   return NULL;
 }
+
+/* Generic hook that takes a const_tree and returns NULL_TREE.  */
+tree
+hook_tree_const_tree_null (const_tree t ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -64,6 +64,8 @@
 extern int hook_int_size_t_constcharptr_int_0 (size_t, const char *, int);
 extern int hook_int_void_no_regs (void);

+extern tree hook_tree_const_tree_null (const_tree);
+
 extern tree hook_tree_tree_tree_null (tree, tree);
 extern tree hook_tree_tree_tree_tree_null (tree, tree, tree);
 extern tree hook_tree_tree_tree_tree_3rd_identity (tree, tree, tree);
--- a/gcc/incpath.c
+++ b/gcc/incpath.c
@@ -30,6 +30,8 @@
 #include "intl.h"
 #include "incpath.h"
 #include "cppdefault.h"
+#include "flags.h"
+#include "toplev.h"

 /* Microsoft Windows does not natively support inodes.
    VMS has non-numeric inodes.  */
@@ -353,6 +355,24 @@
 	}
       fprintf (stderr, _("End of search list.\n"));
     }
+
+#ifdef ENABLE_POISON_SYSTEM_DIRECTORIES
+  if (flag_poison_system_directories)
+    {
+	struct cpp_dir *p;
+
+	for (p = heads[QUOTE]; p; p = p->next)
+	  {
+	   if ((!strncmp (p->name, "/usr/include", 12))
+	       || (!strncmp (p->name, "/usr/local/include", 18))
+	       || (!strncmp (p->name, "/usr/X11R6/include", 18)))
+	     warning (OPT_Wpoison_system_directories,
+		      "include location \"%s\" is unsafe for "
+		      "cross-compilation",
+		      p->name);
+	  }
+    }
+#endif
 }

 /* Use given -I paths for #include "..." but not #include <...>, and
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -1349,14 +1349,12 @@
   return for_each_rtx (&insn, insn_contains_asm_1, NULL);
 }

-/* Set up regs_asm_clobbered.  */
+/* Add register clobbers from asm statements.  */
 static void
-compute_regs_asm_clobbered (char *regs_asm_clobbered)
+compute_regs_asm_clobbered (void)
 {
   basic_block bb;

-  memset (regs_asm_clobbered, 0, sizeof (char) * FIRST_PSEUDO_REGISTER);
-
   FOR_EACH_BB (bb)
     {
       rtx insn;
@@ -1377,7 +1375,7 @@
 		      + hard_regno_nregs[dregno][mode] - 1;

 		    for (i = dregno; i <= end; ++i)
-		      regs_asm_clobbered[i] = 1;
+		      SET_HARD_REG_BIT(crtl->asm_clobbers, i);
 		  }
 	      }
 	}
@@ -1415,7 +1413,8 @@
   COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs);
   CLEAR_HARD_REG_SET (eliminable_regset);

-  compute_regs_asm_clobbered (regs_asm_clobbered);
+  compute_regs_asm_clobbered ();
+
   /* Build the regset of all eliminable registers and show we can't
      use those that we already know won't be eliminated.  */
 #ifdef ELIMINABLE_REGS
@@ -1425,7 +1424,7 @@
 	= (! CAN_ELIMINATE (eliminables[i].from, eliminables[i].to)
 	   || (eliminables[i].to == STACK_POINTER_REGNUM && need_fp));

-      if (! regs_asm_clobbered[eliminables[i].from])
+      if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, eliminables[i].from))
 	{
 	    SET_HARD_REG_BIT (eliminable_regset, eliminables[i].from);

@@ -1439,7 +1438,7 @@
 	df_set_regs_ever_live (eliminables[i].from, true);
     }
 #if FRAME_POINTER_REGNUM != HARD_FRAME_POINTER_REGNUM
-  if (! regs_asm_clobbered[HARD_FRAME_POINTER_REGNUM])
+  if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, HARD_FRAME_POINTER_REGNUM))
     {
       SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM);
       if (need_fp)
@@ -1453,7 +1452,7 @@
 #endif

 #else
-  if (! regs_asm_clobbered[FRAME_POINTER_REGNUM])
+  if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, HARD_FRAME_POINTER_REGNUM))
     {
       SET_HARD_REG_BIT (eliminable_regset, FRAME_POINTER_REGNUM);
       if (need_fp)
--- a/gcc/ira-costs.c
+++ b/gcc/ira-costs.c
@@ -706,11 +706,11 @@

 /* Wrapper around REGNO_OK_FOR_INDEX_P, to allow pseudo registers.  */
 static inline bool
-ok_for_index_p_nonstrict (rtx reg)
+ok_for_index_p_nonstrict (rtx reg, enum machine_mode mode)
 {
   unsigned regno = REGNO (reg);

-  return regno >= FIRST_PSEUDO_REGISTER || REGNO_OK_FOR_INDEX_P (regno);
+  return regno >= FIRST_PSEUDO_REGISTER || ok_for_index_p_1 (regno, mode);
 }

 /* A version of regno_ok_for_base_p for use here, when all
@@ -748,7 +748,7 @@
   enum reg_class rclass;

   if (context == 1)
-    rclass = INDEX_REG_CLASS;
+    rclass = index_reg_class (mode);
   else
     rclass = base_reg_class (mode, outer_code, index_code);

@@ -795,7 +795,8 @@
 	   just record registers in any non-constant operands.  We
 	   assume here, as well as in the tests below, that all
 	   addresses are in canonical form.  */
-	else if (INDEX_REG_CLASS == base_reg_class (VOIDmode, PLUS, SCRATCH))
+	else if (index_reg_class (mode)
+		 == base_reg_class (mode, PLUS, SCRATCH))
 	  {
 	    record_address_regs (mode, arg0, context, PLUS, code1, scale);
 	    if (! CONSTANT_P (arg1))
@@ -816,7 +817,7 @@
 	else if (code0 == REG && code1 == REG
 		 && REGNO (arg0) < FIRST_PSEUDO_REGISTER
 		 && (ok_for_base_p_nonstrict (arg0, mode, PLUS, REG)
-		     || ok_for_index_p_nonstrict (arg0)))
+		     || ok_for_index_p_nonstrict (arg0, mode)))
 	  record_address_regs (mode, arg1,
 			       ok_for_base_p_nonstrict (arg0, mode, PLUS, REG)
 			       ? 1 : 0,
@@ -824,7 +825,7 @@
 	else if (code0 == REG && code1 == REG
 		 && REGNO (arg1) < FIRST_PSEUDO_REGISTER
 		 && (ok_for_base_p_nonstrict (arg1, mode, PLUS, REG)
-		     || ok_for_index_p_nonstrict (arg1)))
+		     || ok_for_index_p_nonstrict (arg1, mode)))
 	  record_address_regs (mode, arg0,
 			       ok_for_base_p_nonstrict (arg1, mode, PLUS, REG)
 			       ? 1 : 0,
--- a/gcc/longlong.h
+++ b/gcc/longlong.h
@@ -982,7 +982,7 @@
 "	or r1,%0"							\
 	: "=r" (q), "=&z" (r)						\
 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
-	: "r1", "r2", "r4", "r5", "r6", "pr");				\
+	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
   } while (0)

 #define UDIV_TIME 80
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1249,6 +1249,7 @@
 	tree-ssa-loop-manip.o \
 	tree-ssa-loop-niter.o \
 	tree-ssa-loop-prefetch.o \
+	tree-ssa-loop-promote.o \
 	tree-ssa-loop-unswitch.o \
 	tree-ssa-loop.o \
 	tree-ssa-math-opts.o \
@@ -1258,6 +1259,7 @@
 	tree-ssa-pre.o \
 	tree-ssa-propagate.o \
 	tree-ssa-reassoc.o \
+	tree-ssa-remove-local-statics.o \
 	tree-ssa-sccvn.o \
 	tree-ssa-sink.o \
 	tree-ssa-structalias.o \
@@ -1674,7 +1676,7 @@
 	$(MACHMODE_H) $(FPBIT) $(DPBIT) $(TPBIT) $(LIB2ADD) \
 	$(LIB2ADD_ST) $(LIB2ADDEH) $(srcdir)/emutls.c gcov-iov.h $(SFP_MACHINE)

-libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs \
+libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs $(tmake_file) \
 		xgcc$(exeext)
 	: > tmp-libgcc.mvars
 	echo LIB1ASMFUNCS = '$(LIB1ASMFUNCS)' >> tmp-libgcc.mvars
@@ -1728,7 +1730,7 @@
 # driver program needs to select the library directory based on the
 # switches.
 multilib.h: s-mlib; @true
-s-mlib: $(srcdir)/genmultilib Makefile
+s-mlib: $(srcdir)/genmultilib Makefile $(tmakefile)
 	if test @enable_multilib@ = yes \
 	   || test -n "$(MULTILIB_OSDIRNAMES)"; then \
 	  $(SHELL) $(srcdir)/genmultilib \
@@ -1816,7 +1818,7 @@

 incpath.o: incpath.c incpath.h $(CONFIG_H) $(SYSTEM_H) $(CPPLIB_H) \
 		intl.h prefix.h coretypes.h $(TM_H) cppdefault.h $(TARGET_H) \
-		$(MACHMODE_H)
+		$(MACHMODE_H) $(FLAGS_H) toplev.h

 c-decl.o : c-decl.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(RTL_H) $(C_TREE_H) $(GGC_H) $(TARGET_H) $(FLAGS_H) $(FUNCTION_H) output.h \
@@ -1900,7 +1902,7 @@
         $(TREE_H) $(C_PRAGMA_H) $(FLAGS_H) $(TOPLEV_H) langhooks.h		\
         $(TREE_INLINE_H) $(DIAGNOSTIC_H) intl.h debug.h $(C_COMMON_H)	\
         opts.h options.h $(MKDEPS_H) incpath.h cppdefault.h $(TARGET_H) \
-	$(TM_P_H) $(VARRAY_H)
+	$(TM_P_H) $(VARRAY_H) $(C_TREE_H)
 	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) \
 		$< $(OUTPUT_OPTION) @TARGET_SYSTEM_ROOT_DEFINE@

@@ -2176,6 +2178,9 @@
    alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) $(HASHTAB_H) $(GIMPLE_H) \
    $(TREE_INLINE_H) tree-iterator.h tree-ssa-sccvn.h $(PARAMS_H) \
    $(DBGCNT_H)
+tree-ssa-remove-local-statics.o: tree-ssa-remove-local-statics.c \
+   coretypes.h $(CONFIG_H) $(SYSTEM_H) $(BASIC_BLOCK_H) tree.h tree-pass.h \
+   $(TM_H) $(HASHTAB_H) $(BASIC_BLOCK_H)
 tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(SYSTEM_H) $(TREE_H) $(GGC_H) $(DIAGNOSTIC_H) $(TIMEVAR_H) $(FIBHEAP_H) \
    $(TM_H) coretypes.h $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) $(CFGLOOP_H) \
@@ -2271,6 +2276,12 @@
    $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
    tree-chrec.h $(TOPLEV_H) langhooks.h $(TREE_INLINE_H) $(TREE_DATA_REF_H) \
    $(OPTABS_H)
+tree-ssa-loop-promote.o: tree-ssa-loop-promote.c \
+   coretypes.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TOPLEV_H) \
+   $(RTL_H) $(TM_P_H) hard-reg-set.h $(OBSTACK_H) $(BASIC_BLOCK_H) \
+   pointer-set.h intl.h $(TREE_H) $(GIMPLE_H) $(HASHTAB_H) $(DIAGNOSTIC_H) \
+   $(TREE_FLOW_H) $(TREE_DUMP_H) $(CFGLOOP_H) $(FLAGS_H) $(TIMEVAR_H) \
+   tree-pass.h $(TM_H)
 tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \
    $(CFGLOOP_H) $(TREE_FLOW_H) $(GGC_H) $(TREE_DATA_REF_H) $(SCEV_H) \
    $(PARAMS_H) $(DIAGNOSTIC_H) tree-pass.h $(TM_H) coretypes.h tree-affine.h \
@@ -2865,7 +2876,7 @@
    $(RTL_H) $(REAL_H) $(FLAGS_H) $(EXPR_H) $(OPTABS_H) reload.h $(REGS_H) \
    hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) output.h \
    $(FUNCTION_H) $(TOPLEV_H) cselib.h $(TM_P_H) except.h $(TREE_H) $(MACHMODE_H) \
-   $(OBSTACK_H) $(TIMEVAR_H) tree-pass.h $(DF_H) $(DBGCNT_H)
+   $(OBSTACK_H) $(TIMEVAR_H) tree-pass.h addresses.h $(DF_H) $(DBGCNT_H)
 postreload-gcse.o : postreload-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
    $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h $(TOPLEV_H) \
@@ -3582,7 +3593,7 @@
 # be rebuilt.

 # Build the include directories.
-stmp-int-hdrs: $(STMP_FIXINC) $(USER_H) $(UNWIND_H) fixinc_list
+stmp-int-hdrs: $(STMP_FIXINC) $(USER_H) $(UNWIND_H)
 # Copy in the headers provided with gcc.
 # The sed command gets just the last file name component;
 # this is necessary because VPATH could add a dirname.
@@ -3601,21 +3612,23 @@
 	done
 	rm -f include/unwind.h
 	cp $(UNWIND_H) include/unwind.h
-	set -e; for ml in `cat fixinc_list`; do \
-	  sysroot_headers_suffix=`echo $${ml} | sed -e 's/;.*$$//'`; \
-	  multi_dir=`echo $${ml} | sed -e 's/^[^;]*;//'`; \
-	  fix_dir=include-fixed$${multi_dir}; \
-	  if $(LIMITS_H_TEST) ; then \
-	    cat $(srcdir)/limitx.h $(srcdir)/glimits.h $(srcdir)/limity.h > tmp-xlimits.h; \
-	  else \
-	    cat $(srcdir)/glimits.h > tmp-xlimits.h; \
-	  fi; \
-	  $(mkinstalldirs) $${fix_dir}; \
-	  chmod a+rx $${fix_dir} || true; \
-	  rm -f $${fix_dir}/limits.h; \
-	  mv tmp-xlimits.h $${fix_dir}/limits.h; \
-	  chmod a+r $${fix_dir}/limits.h; \
-	done
+	set -e; if [ -f fixinc_list ] ; then \
+	  for ml in `cat fixinc_list`; do \
+	    sysroot_headers_suffix=`echo $${ml} | sed -e 's/;.*$$//'`; \
+	    multi_dir=`echo $${ml} | sed -e 's/^[^;]*;//'`; \
+	    fix_dir=include-fixed$${multi_dir}; \
+	    if $(LIMITS_H_TEST) ; then \
+	      cat $(srcdir)/limitx.h $(srcdir)/glimits.h $(srcdir)/limity.h > tmp-xlimits.h; \
+	    else \
+	      cat $(srcdir)/glimits.h > tmp-xlimits.h; \
+	    fi; \
+	    $(mkinstalldirs) $${fix_dir}; \
+	    chmod a+rx $${fix_dir} || true; \
+	    rm -f $${fix_dir}/limits.h; \
+	    mv tmp-xlimits.h $${fix_dir}/limits.h; \
+	    chmod a+r $${fix_dir}/limits.h; \
+	  done; \
+	fi
 # Install the README
 	rm -f include-fixed/README
 	cp $(srcdir)/../fixincludes/README-fixinc include-fixed/README
@@ -4340,16 +4353,18 @@

 # Install supporting files for fixincludes to be run later.
 install-mkheaders: stmp-int-hdrs $(STMP_FIXPROTO) install-itoolsdirs \
-  macro_list fixinc_list
+  macro_list
 	$(INSTALL_DATA) $(srcdir)/gsyslimits.h \
 	  $(DESTDIR)$(itoolsdatadir)/gsyslimits.h
 	$(INSTALL_DATA) macro_list $(DESTDIR)$(itoolsdatadir)/macro_list
-	$(INSTALL_DATA) fixinc_list $(DESTDIR)$(itoolsdatadir)/fixinc_list
-	set -e; for ml in `cat fixinc_list`; do \
-	  multi_dir=`echo $${ml} | sed -e 's/^[^;]*;//'`; \
-	  $(mkinstalldirs) $(DESTDIR)$(itoolsdatadir)/include$${multi_dir}; \
-	  $(INSTALL_DATA) include-fixed$${multidir}/limits.h $(DESTDIR)$(itoolsdatadir)/include$${multi_dir}/limits.h; \
-	done
+	set -e; if [ -f fixinc_list ] ; then \
+	  $(INSTALL_DATA) fixinc_list $(DESTDIR)$(itoolsdatadir)/fixinc_list; \
+	  for ml in `cat fixinc_list`; do \
+	    multi_dir=`echo $${ml} | sed -e 's/^[^;]*;//'`; \
+	    $(mkinstalldirs) $(DESTDIR)$(itoolsdatadir)/include$${multi_dir}; \
+	    $(INSTALL_DATA) include-fixed$${multidir}/limits.h $(DESTDIR)$(itoolsdatadir)/include$${multi_dir}/limits.h; \
+	  done; \
+	fi
 	$(INSTALL_SCRIPT) $(srcdir)/../mkinstalldirs \
 		$(DESTDIR)$(itoolsdir)/mkinstalldirs ; \
 	if [ x$(STMP_FIXPROTO) != x ] ; then \
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -270,6 +270,7 @@
   NULL,
   sms_print_insn,
   NULL,
+  NULL, /* insn_finishes_block_p */
   NULL, NULL,
   NULL, NULL,
   0, 0,
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -3300,7 +3300,8 @@
       if (unoptab == ffs_optab || unoptab == clz_optab || unoptab == ctz_optab
 	  || unoptab == popcount_optab || unoptab == parity_optab)
 	outmode
-	    = GET_MODE (hard_libcall_value (TYPE_MODE (integer_type_node)));
+	  = GET_MODE (hard_libcall_value (TYPE_MODE (integer_type_node),
+					  optab_libfunc (unoptab, mode)));

       start_sequence ();

@@ -4357,10 +4358,12 @@
        mode != VOIDmode;
        mode = GET_MODE_WIDER_MODE (mode))
     {
-      if ((libfunc = optab_libfunc (code_to_optab[comparison], mode)))
+      if (code_to_optab[comparison]
+	  && (libfunc = optab_libfunc (code_to_optab[comparison], mode)))
 	break;

-      if ((libfunc = optab_libfunc (code_to_optab[swapped] , mode)))
+      if (code_to_optab[swapped]
+	  && (libfunc = optab_libfunc (code_to_optab[swapped], mode)))
 	{
 	  rtx tmp;
 	  tmp = x; x = y; y = tmp;
@@ -4368,7 +4371,8 @@
 	  break;
 	}

-      if ((libfunc = optab_libfunc (code_to_optab[reversed], mode))
+      if (code_to_optab[reversed]
+	  && (libfunc = optab_libfunc (code_to_optab[reversed], mode))
 	  && FLOAT_LIB_COMPARE_RETURNS_BOOL (mode, reversed))
 	{
 	  comparison = reversed;
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -904,7 +904,7 @@
   flag_tree_vrp = opt2;
   flag_tree_builtin_call_dce = opt2;
   flag_tree_pre = opt2;
-  flag_tree_switch_conversion = 1;
+  flag_tree_switch_conversion = opt2;
   flag_ipa_cp = opt2;

   /* Allow more virtual operators to increase alias precision.  */
@@ -928,6 +928,7 @@
   flag_gcse_after_reload = opt3;
   flag_tree_vectorize = opt3;
   flag_ipa_cp_clone = opt3;
+  flag_tree_pre_partial_partial = opt3;
   if (flag_ipa_cp_clone)
     flag_ipa_cp = 1;

@@ -951,10 +952,13 @@
 	 being declared inline.  */
       flag_inline_functions = 1;

-      /* Basic optimization options.  */
-      optimize_size = 1;
+      /* Basic optimization options at -Os are almost the same as -O2.  The
+	 only difference is that we disable PRE, because it sometimes still
+	 increases code size.  If the user want to run PRE with -Os, he/she
+	 will have to indicate so explicitly.  */
       if (optimize > 2)
 	optimize = 2;
+      flag_tree_pre = 0;

       /* We want to crossjump as much as possible.  */
       set_param_value ("min-crossjump-insns", 1);
@@ -2060,6 +2064,10 @@
       /* These are no-ops, preserved for backward compatibility.  */
       break;

+    case OPT_feglibc_:
+      /* This is a no-op at the moment.  */
+      break;
+
     default:
       /* If the flag was handled in a standard way, assume the lack of
 	 processing here is intentional.  */
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -591,6 +591,7 @@
       NEXT_PASS (pass_rename_ssa_copies);
       NEXT_PASS (pass_complete_unrolli);
       NEXT_PASS (pass_ccp);
+      NEXT_PASS (pass_promote_indices);
       NEXT_PASS (pass_forwprop);
       /* Ideally the function call conditional
 	 dead code elimination phase can be delayed
@@ -605,6 +606,7 @@
 	 alias information also rewrites no longer addressed
 	 locals into SSA form if possible.  */
       NEXT_PASS (pass_build_alias);
+      NEXT_PASS (pass_remove_local_statics);
       NEXT_PASS (pass_return_slot);
       NEXT_PASS (pass_phiprop);
       NEXT_PASS (pass_fre);
--- a/gcc/pointer-set.c
+++ b/gcc/pointer-set.c
@@ -181,6 +181,23 @@
       break;
 }

+/* Return the number of elements in PSET.  */
+
+size_t
+pointer_set_n_elements (struct pointer_set_t *pset)
+{
+  return pset->n_elements;
+}
+
+/* Remove all entries from PSET.  */
+
+void
+pointer_set_clear (struct pointer_set_t *pset)
+{
+  pset->n_elements = 0;
+  memset (pset->slots, 0, sizeof (pset->slots[0]) * pset->n_slots);
+}
+

 /* A pointer map is represented the same way as a pointer_set, so
    the hash code is based on the address of the key, rather than
@@ -301,3 +318,20 @@
     if (pmap->keys[i] && !fn (pmap->keys[i], &pmap->values[i], data))
       break;
 }
+
+/* Return the number of elements in PMAP.  */
+
+size_t
+pointer_map_n_elements (struct pointer_map_t *pmap)
+{
+  return pmap->n_elements;
+}
+
+/* Remove all entries from PMAP.  */
+
+void pointer_map_clear (struct pointer_map_t *pmap)
+{
+  pmap->n_elements = 0;
+  memset (pmap->keys, 0, sizeof (pmap->keys[0]) * pmap->n_slots);
+  memset (pmap->values, 0, sizeof (pmap->values[0]) * pmap->n_slots);
+}
--- a/gcc/pointer-set.h
+++ b/gcc/pointer-set.h
@@ -29,6 +29,8 @@
 void pointer_set_traverse (const struct pointer_set_t *,
 			   bool (*) (const void *, void *),
 			   void *);
+size_t pointer_set_n_elements (struct pointer_set_t *);
+void pointer_set_clear (struct pointer_set_t *);

 struct pointer_map_t;
 struct pointer_map_t *pointer_map_create (void);
@@ -38,5 +40,7 @@
 void **pointer_map_insert (struct pointer_map_t *pmap, const void *p);
 void pointer_map_traverse (const struct pointer_map_t *,
 			   bool (*) (const void *, void **, void *), void *);
+size_t pointer_map_n_elements (struct pointer_map_t *);
+void pointer_map_clear (struct pointer_map_t *);

 #endif  /* POINTER_SET_H  */
--- a/gcc/postreload.c
+++ b/gcc/postreload.c
@@ -46,6 +46,7 @@
 #include "tree.h"
 #include "timevar.h"
 #include "tree-pass.h"
+#include "addresses.h"
 #include "df.h"
 #include "dbgcnt.h"

@@ -708,17 +709,19 @@
   int last_label_ruid;
   int min_labelno, n_labels;
   HARD_REG_SET ever_live_at_start, *label_live;
+  enum reg_class index_regs;

   /* If reg+reg can be used in offsetable memory addresses, the main chunk of
      reload has already used it where appropriate, so there is no use in
      trying to generate it now.  */
-  if (double_reg_address_ok && INDEX_REG_CLASS != NO_REGS)
+  index_regs = index_reg_class (VOIDmode);
+  if (double_reg_address_ok && index_regs != NO_REGS)
     return;

   /* To avoid wasting too much time later searching for an index register,
      determine the minimum and maximum index register numbers.  */
   for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
-    if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], r))
+    if (TEST_HARD_REG_BIT (reg_class_contents[index_regs], r))
       {
 	if (first_index_reg == -1)
 	  first_index_reg = r;
@@ -826,8 +829,8 @@
 	     substitute uses of REG (typically in MEMs) with.
 	     First check REG and BASE for being index registers;
 	     we can use them even if they are not dead.  */
-	  if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], regno)
-	      || TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS],
+	  if (TEST_HARD_REG_BIT (reg_class_contents[index_regs], regno)
+	      || TEST_HARD_REG_BIT (reg_class_contents[index_regs],
 				    REGNO (base)))
 	    {
 	      const_reg = reg;
@@ -841,8 +844,7 @@
 		 two registers.  */
 	      for (i = first_index_reg; i <= last_index_reg; i++)
 		{
-		  if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS],
-					 i)
+		  if (TEST_HARD_REG_BIT (reg_class_contents[index_regs], i)
 		      && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES
 		      && reg_state[i].store_ruid <= reg_state[regno].use_ruid
 		      && hard_regno_nregs[i][GET_MODE (reg)] == 1)
--- a/gcc/real.c
+++ b/gcc/real.c
@@ -4513,6 +4513,167 @@
     false
   };

+/* Encode half-precision floats.  This routine is used both for the IEEE
+   ARM alternative encodings.  */
+static void
+encode_ieee_half (const struct real_format *fmt, long *buf,
+		  const REAL_VALUE_TYPE *r)
+{
+  unsigned long image, sig, exp;
+  unsigned long sign = r->sign;
+  bool denormal = (r->sig[SIGSZ-1] & SIG_MSB) == 0;
+
+  image = sign << 15;
+  sig = (r->sig[SIGSZ-1] >> (HOST_BITS_PER_LONG - 11)) & 0x3ff;
+
+  switch (r->cl)
+    {
+    case rvc_zero:
+      break;
+
+    case rvc_inf:
+      if (fmt->has_inf)
+	image |= 31 << 10;
+      else
+	image |= 0x7fff;
+      break;
+
+    case rvc_nan:
+      if (fmt->has_nans)
+	{
+	  if (r->canonical)
+	    sig = (fmt->canonical_nan_lsbs_set ? (1 << 9) - 1 : 0);
+	  if (r->signalling == fmt->qnan_msb_set)
+	    sig &= ~(1 << 9);
+	  else
+	    sig |= 1 << 9;
+	  if (sig == 0)
+	    sig = 1 << 8;
+
+	  image |= 31 << 10;
+	  image |= sig;
+	}
+      else
+	image |= 0x3ff;
+      break;
+
+    case rvc_normal:
+      /* Recall that IEEE numbers are interpreted as 1.F x 2**exp,
+	 whereas the intermediate representation is 0.F x 2**exp.
+	 Which means we're off by one.  */
+      if (denormal)
+	exp = 0;
+      else
+	exp = REAL_EXP (r) + 15 - 1;
+      image |= exp << 10;
+      image |= sig;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  buf[0] = image;
+}
+
+/* Decode half-precision floats.  This routine is used both for the IEEE
+   ARM alternative encodings.  */
+static void
+decode_ieee_half (const struct real_format *fmt, REAL_VALUE_TYPE *r,
+		  const long *buf)
+{
+  unsigned long image = buf[0] & 0xffff;
+  bool sign = (image >> 15) & 1;
+  int exp = (image >> 10) & 0x1f;
+
+  memset (r, 0, sizeof (*r));
+  image <<= HOST_BITS_PER_LONG - 11;
+  image &= ~SIG_MSB;
+
+  if (exp == 0)
+    {
+      if (image && fmt->has_denorm)
+	{
+	  r->cl = rvc_normal;
+	  r->sign = sign;
+	  SET_REAL_EXP (r, -14);
+	  r->sig[SIGSZ-1] = image << 1;
+	  normalize (r);
+	}
+      else if (fmt->has_signed_zero)
+	r->sign = sign;
+    }
+  else if (exp == 31 && (fmt->has_nans || fmt->has_inf))
+    {
+      if (image)
+	{
+	  r->cl = rvc_nan;
+	  r->sign = sign;
+	  r->signalling = (((image >> (HOST_BITS_PER_LONG - 2)) & 1)
+			   ^ fmt->qnan_msb_set);
+	  r->sig[SIGSZ-1] = image;
+	}
+      else
+	{
+	  r->cl = rvc_inf;
+	  r->sign = sign;
+	}
+    }
+  else
+    {
+      r->cl = rvc_normal;
+      r->sign = sign;
+      SET_REAL_EXP (r, exp - 15 + 1);
+      r->sig[SIGSZ-1] = image | SIG_MSB;
+    }
+}
+
+/* Half-precision format, as specified in IEEE 754R.  */
+const struct real_format ieee_half_format =
+  {
+    encode_ieee_half,
+    decode_ieee_half,
+    2,
+    11,
+    11,
+    -13,
+    16,
+    15,
+    15,
+    false,
+    true,
+    true,
+    true,
+    true,
+    true,
+    true,
+    false
+  };
+
+/* ARM's alternative half-precision format, similar to IEEE but with
+   no reserved exponent value for NaNs and infinities; rather, it just
+   extends the range of exponents by one.  */
+const struct real_format arm_half_format =
+  {
+    encode_ieee_half,
+    decode_ieee_half,
+    2,
+    11,
+    11,
+    -13,
+    17,
+    15,
+    15,
+    false,
+    true,
+    false,
+    false,
+    true,
+    true,
+    false,
+    false
+  };
+
 /* A synthetic "format" for internal arithmetic.  It's the size of the
    internal significand minus the two bits needed for proper rounding.
    The encode and decode routines exist only to satisfy our paranoia
--- a/gcc/real.h
+++ b/gcc/real.h
@@ -304,6 +304,8 @@
 extern const struct real_format decimal_single_format;
 extern const struct real_format decimal_double_format;
 extern const struct real_format decimal_quad_format;
+extern const struct real_format ieee_half_format;
+extern const struct real_format arm_half_format;


 /* ====================================================================== */
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -567,14 +567,14 @@
 	    int index_op;
 	    unsigned regno0 = REGNO (op0), regno1 = REGNO (op1);

-	    if (REGNO_OK_FOR_INDEX_P (regno1)
+	    if (regno_ok_for_index_p (regno1, mode)
 		&& regno_ok_for_base_p (regno0, mode, PLUS, REG))
 	      index_op = 1;
-	    else if (REGNO_OK_FOR_INDEX_P (regno0)
+	    else if (regno_ok_for_index_p (regno0, mode)
 		     && regno_ok_for_base_p (regno1, mode, PLUS, REG))
 	      index_op = 0;
 	    else if (regno_ok_for_base_p (regno0, mode, PLUS, REG)
-		     || REGNO_OK_FOR_INDEX_P (regno1))
+		     || regno_ok_for_index_p (regno1, mode))
 	      index_op = 1;
 	    else if (regno_ok_for_base_p (regno1, mode, PLUS, REG))
 	      index_op = 0;
@@ -599,7 +599,7 @@
 	  }

 	if (locI)
-	  scan_rtx_address (insn, locI, INDEX_REG_CLASS, action, mode);
+	  scan_rtx_address (insn, locI, index_reg_class (mode), action, mode);
 	if (locB)
 	  scan_rtx_address (insn, locB, base_reg_class (mode, PLUS, index_code),
 			    action, mode);
@@ -1488,14 +1488,14 @@
 	    int index_op;
 	    unsigned regno0 = REGNO (op0), regno1 = REGNO (op1);

-	    if (REGNO_OK_FOR_INDEX_P (regno1)
+	    if (regno_ok_for_index_p (regno1, mode)
 		&& regno_ok_for_base_p (regno0, mode, PLUS, REG))
 	      index_op = 1;
-	    else if (REGNO_OK_FOR_INDEX_P (regno0)
+	    else if (regno_ok_for_index_p (regno0, mode)
 		     && regno_ok_for_base_p (regno1, mode, PLUS, REG))
 	      index_op = 0;
 	    else if (regno_ok_for_base_p (regno0, mode, PLUS, REG)
-		     || REGNO_OK_FOR_INDEX_P (regno1))
+		     || regno_ok_for_index_p (regno1, mode))
 	      index_op = 1;
 	    else if (regno_ok_for_base_p (regno1, mode, PLUS, REG))
 	      index_op = 0;
@@ -1520,8 +1520,8 @@
 	  }

 	if (locI)
-	  changed |= replace_oldest_value_addr (locI, INDEX_REG_CLASS, mode,
-						insn, vd);
+	  changed |= replace_oldest_value_addr (locI, index_reg_class (mode),
+						mode, insn, vd);
 	if (locB)
 	  changed |= replace_oldest_value_addr (locB,
 						base_reg_class (mode, PLUS,
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -5046,7 +5046,7 @@
 	    loc = &XEXP (*loc, 0);
 	}

-      if (double_reg_address_ok)
+      if (double_reg_address_ok && index_reg_class (mode) != NO_REGS)
 	{
 	  /* Unshare the sum as well.  */
 	  *loc = ad = copy_rtx (ad);
@@ -5054,8 +5054,8 @@
 	  /* Reload the displacement into an index reg.
 	     We assume the frame pointer or arg pointer is a base reg.  */
 	  find_reloads_address_part (XEXP (ad, 1), &XEXP (ad, 1),
-				     INDEX_REG_CLASS, GET_MODE (ad), opnum,
-				     type, ind_levels);
+				     index_reg_class (mode), GET_MODE (ad),
+				     opnum, type, ind_levels);
 	  return 0;
 	}
       else
@@ -5448,13 +5448,13 @@
 #define REG_OK_FOR_CONTEXT(CONTEXT, REGNO, MODE, OUTER, INDEX)		\
   ((CONTEXT) == 0							\
    ? regno_ok_for_base_p (REGNO, MODE, OUTER, INDEX)			\
-   : REGNO_OK_FOR_INDEX_P (REGNO))
+   : regno_ok_for_index_p (REGNO, MODE))

   enum reg_class context_reg_class;
   RTX_CODE code = GET_CODE (x);

   if (context == 1)
-    context_reg_class = INDEX_REG_CLASS;
+    context_reg_class = index_reg_class (mode);
   else
     context_reg_class = base_reg_class (mode, outer_code, index_code);

@@ -5546,17 +5546,17 @@

 	else if (code0 == REG && code1 == REG)
 	  {
-	    if (REGNO_OK_FOR_INDEX_P (REGNO (op1))
+	    if (regno_ok_for_index_p (REGNO (op1), mode)
 		&& regno_ok_for_base_p (REGNO (op0), mode, PLUS, REG))
 	      return 0;
-	    else if (REGNO_OK_FOR_INDEX_P (REGNO (op0))
+	    else if (regno_ok_for_index_p (REGNO (op0), mode)
 		     && regno_ok_for_base_p (REGNO (op1), mode, PLUS, REG))
 	      return 0;
 	    else if (regno_ok_for_base_p (REGNO (op0), mode, PLUS, REG))
 	      find_reloads_address_1 (mode, orig_op1, 1, PLUS, SCRATCH,
 				      &XEXP (x, 1), opnum, type, ind_levels,
 				      insn);
-	    else if (REGNO_OK_FOR_INDEX_P (REGNO (op1)))
+	    else if (regno_ok_for_index_p (REGNO (op1), mode))
 	      find_reloads_address_1 (mode, orig_op0, 0, PLUS, REG,
 				      &XEXP (x, 0), opnum, type, ind_levels,
 				      insn);
@@ -5564,7 +5564,7 @@
 	      find_reloads_address_1 (mode, orig_op0, 1, PLUS, SCRATCH,
 				      &XEXP (x, 0), opnum, type, ind_levels,
 				      insn);
-	    else if (REGNO_OK_FOR_INDEX_P (REGNO (op0)))
+	    else if (regno_ok_for_index_p (REGNO (op0), mode))
 	      find_reloads_address_1 (mode, orig_op1, 0, PLUS, REG,
 				      &XEXP (x, 1), opnum, type, ind_levels,
 				      insn);
@@ -5634,7 +5634,7 @@
 	   need to live longer than a TYPE reload normally would, so be
 	   conservative and class it as RELOAD_OTHER.  */
 	if ((REG_P (XEXP (op1, 1))
-	     && !REGNO_OK_FOR_INDEX_P (REGNO (XEXP (op1, 1))))
+	     && !regno_ok_for_index_p (REGNO (XEXP (op1, 1)), mode))
 	    || GET_CODE (XEXP (op1, 1)) == PLUS)
 	  find_reloads_address_1 (mode, XEXP (op1, 1), 1, code, SCRATCH,
 				  &XEXP (op1, 1), opnum, RELOAD_OTHER,
@@ -6128,18 +6128,26 @@
 	      /* For some processors an address may be valid in the
 		 original mode but not in a smaller mode.  For
 		 example, ARM accepts a scaled index register in
-		 SImode but not in HImode.  Similarly, the address may
-		 have been valid before the subreg offset was added,
-		 but not afterwards.  find_reloads_address
-		 assumes that we pass it a valid address, and doesn't
-		 force a reload.  This will probably be fine if
-		 find_reloads_address finds some reloads.  But if it
-		 doesn't find any, then we may have just converted a
-		 valid address into an invalid one.  Check for that
-		 here.  */
+		 SImode but not in HImode.  Note that this is only
+		 a problem if the address in reg_equiv_mem is already
+		 invalid in the new mode; other cases would be fixed
+		 by find_reloads_address as usual.
+
+		 ??? We attempt to handle such cases here by doing an
+		 additional reload of the full address after the
+		 usual processing by find_reloads_address.  Note that
+		 this may not work in the general case, but it seems
+		 to cover the cases where this situation currently
+		 occurs.  A more general fix might be to reload the
+		 *value* instead of the address, but this would not
+		 be expected by the callers of this routine as-is.
+
+		 If find_reloads_address already completed replaced
+		 the address, there is nothing further to do.  */
 	      if (reloaded == 0
-		  && !strict_memory_address_p (GET_MODE (tem),
-					       XEXP (tem, 0)))
+		  && reg_equiv_mem[regno] != 0
+		  && !strict_memory_address_p (GET_MODE (x),
+					       XEXP (reg_equiv_mem[regno], 0)))
 		push_reload (XEXP (tem, 0), NULL_RTX, &XEXP (tem, 0), (rtx*) 0,
 			     base_reg_class (GET_MODE (tem), MEM, SCRATCH),
 			     GET_MODE (XEXP (tem, 0)), VOIDmode, 0, 0,
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -2913,62 +2913,78 @@
 commutative_operand_precedence (rtx op)
 {
   enum rtx_code code = GET_CODE (op);
+  int value;

   /* Constants always come the second operand.  Prefer "nice" constants.  */
   if (code == CONST_INT)
-    return -8;
-  if (code == CONST_DOUBLE)
-    return -7;
-  if (code == CONST_FIXED)
-    return -7;
-  op = avoid_constant_pool_reference (op);
-  code = GET_CODE (op);
-
-  switch (GET_RTX_CLASS (code))
-    {
-    case RTX_CONST_OBJ:
-      if (code == CONST_INT)
-        return -6;
-      if (code == CONST_DOUBLE)
-        return -5;
-      if (code == CONST_FIXED)
-        return -5;
-      return -4;
-
-    case RTX_EXTRA:
-      /* SUBREGs of objects should come second.  */
-      if (code == SUBREG && OBJECT_P (SUBREG_REG (op)))
-        return -3;
-      return 0;
+    value = -8;
+  else if (code == CONST_DOUBLE)
+    value = -7;
+  else if (code == CONST_FIXED)
+    value = -7;
+  else
+    {
+      op = avoid_constant_pool_reference (op);
+      code = GET_CODE (op);
+
+      switch (GET_RTX_CLASS (code))
+	{
+	case RTX_CONST_OBJ:
+	  if (code == CONST_INT)
+	    value = -6;
+	  else if (code == CONST_DOUBLE)
+	    value = -5;
+	  else if (code == CONST_FIXED)
+	    value = -5;
+	  else
+	    value = -4;
+	  break;
+
+	case RTX_EXTRA:
+	  /* SUBREGs of objects should come second.  */
+	  if (code == SUBREG && OBJECT_P (SUBREG_REG (op)))
+	    value = -3;
+	  else
+	    value = 0;
+	  break;
+
+	case RTX_OBJ:
+	  /* Complex expressions should be the first, so decrease priority
+	     of objects.  */
+	  value = -1;
+	  break;

-    case RTX_OBJ:
-      /* Complex expressions should be the first, so decrease priority
-         of objects.  Prefer pointer objects over non pointer objects.  */
-      if ((REG_P (op) && REG_POINTER (op))
-	  || (MEM_P (op) && MEM_POINTER (op)))
-	return -1;
-      return -2;
-
-    case RTX_COMM_ARITH:
-      /* Prefer operands that are themselves commutative to be first.
-         This helps to make things linear.  In particular,
-         (and (and (reg) (reg)) (not (reg))) is canonical.  */
-      return 4;
-
-    case RTX_BIN_ARITH:
-      /* If only one operand is a binary expression, it will be the first
-         operand.  In particular,  (plus (minus (reg) (reg)) (neg (reg)))
-         is canonical, although it will usually be further simplified.  */
-      return 2;
+	case RTX_COMM_ARITH:
+	  /* Prefer operands that are themselves commutative to be first.
+	     This helps to make things linear.  In particular,
+	     (and (and (reg) (reg)) (not (reg))) is canonical.  */
+	  value = 4;
+	  break;
+
+	case RTX_BIN_ARITH:
+	  /* If only one operand is a binary expression, it will be the first
+	     operand.  In particular,  (plus (minus (reg) (reg)) (neg (reg)))
+	     is canonical, although it will usually be further simplified.  */
+	  value = 2;
+	  break;

-    case RTX_UNARY:
-      /* Then prefer NEG and NOT.  */
-      if (code == NEG || code == NOT)
-        return 1;
+	case RTX_UNARY:
+	  /* Then prefer NEG and NOT.  */
+	  if (code == NEG || code == NOT)
+	    value = 1;
+	  else
+	    value = 0;
+	  break;

-    default:
-      return 0;
+	default:
+	  value = 0;
+	}
     }
+
+  if (targetm.commutative_operand_precedence)
+    value = targetm.commutative_operand_precedence (op, value);
+
+  return value;
 }

 /* Return 1 iff it is necessary to swap operands of commutative operation
--- a/gcc/rtl.def
+++ b/gcc/rtl.def
@@ -1088,7 +1088,11 @@
    guard for the bypass.  The function will get the two insns as
    parameters.  If the function returns zero the bypass will be
    ignored for this case.  Additional guard is necessary to recognize
-   complicated bypasses, e.g. when consumer is load address.  */
+   complicated bypasses, e.g. when consumer is load address.  If there
+   are more one bypass with the same output and input insns, the
+   chosen bypass is the first bypass with a guard in description whose
+   guard function returns nonzero.  If there is no such bypass, then
+   bypass without the guard function is chosen.  */
 DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)

 /* (define_automaton string) describes names of automata generated and
--- a/gcc/sched-ebb.c
+++ b/gcc/sched-ebb.c
@@ -286,6 +286,7 @@
   rank,
   ebb_print_insn,
   ebb_contributes_to_priority,
+  NULL, /* insn_finishes_block_p */

   NULL, NULL,
   NULL, NULL,
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -558,6 +558,10 @@
      calculations.  */
   int (*contributes_to_priority) (rtx, rtx);

+  /* Return true if scheduling insn (passed as the parameter) will trigger
+     finish of scheduling current block.  */
+  bool (*insn_finishes_block_p) (rtx);
+
   /* The boundaries of the set of insns to be scheduled.  */
   rtx prev_head, next_tail;

--- a/gcc/sched-rgn.c
+++ b/gcc/sched-rgn.c
@@ -2338,6 +2338,19 @@
     0, 0, 0
   };

+/* Return true if scheduling INSN will trigger finish of scheduling
+   current block.  */
+static bool
+rgn_insn_finishes_block_p (rtx insn)
+{
+  if (INSN_BB (insn) == target_bb
+      && sched_target_n_insns + 1 == target_n_insns)
+    /* INSN is the last not-scheduled instruction in the current block.  */
+    return true;
+
+  return false;
+}
+
 /* Used in schedule_insns to initialize current_sched_info for scheduling
    regions (or single basic blocks).  */

@@ -2350,6 +2363,7 @@
   rgn_rank,
   rgn_print_insn,
   contributes_to_priority,
+  rgn_insn_finishes_block_p,

   NULL, NULL,
   NULL, NULL,
--- a/gcc/sdbout.c
+++ b/gcc/sdbout.c
@@ -337,6 +337,7 @@
   debug_nothing_int,		         /* handle_pch */
   debug_nothing_rtx,		         /* var_location */
   debug_nothing_void,                    /* switch_text_section */
+  debug_nothing_tree_tree,		 /* set_name */
   0                                      /* start_end_main_source_file */
 };

--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -5431,6 +5431,7 @@
   NULL, /* rgn_rank */
   sel_print_insn, /* rgn_print_insn */
   contributes_to_priority,
+  NULL, /* insn_finishes_block_p */

   NULL, NULL,
   NULL, NULL,
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -84,7 +84,7 @@
 #define TARGET_ASM_INTERNAL_LABEL default_internal_label
 #endif

-#ifndef TARGET_ARM_TTYPE
+#ifndef TARGET_ASM_TTYPE
 #define TARGET_ASM_TTYPE hook_bool_rtx_false
 #endif

@@ -208,6 +208,10 @@
 #define TARGET_EXTRA_LIVE_ON_ENTRY hook_void_bitmap
 #endif

+#ifndef TARGET_WARN_FUNC_RESULT
+#define TARGET_WARN_FUNC_RESULT hook_bool_void_true
+#endif
+
 #ifndef TARGET_ASM_FILE_START_APP_OFF
 #define TARGET_ASM_FILE_START_APP_OFF false
 #endif
@@ -383,6 +387,9 @@
 #define TARGET_VECTOR_ALIGNMENT_REACHABLE \
   default_builtin_vector_alignment_reachable
 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM 0
+#define TARGET_VECTOR_MIN_ALIGNMENT \
+  default_vector_min_alignment
+#define TARGET_VECTOR_ALWAYS_MISALIGN hook_bool_const_tree_false

 #define TARGET_VECTORIZE                                                \
   {									\
@@ -393,7 +400,9 @@
     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD,				\
     TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST,			\
     TARGET_VECTOR_ALIGNMENT_REACHABLE,                                  \
-    TARGET_VECTORIZE_BUILTIN_VEC_PERM                                   \
+    TARGET_VECTORIZE_BUILTIN_VEC_PERM,                                  \
+    TARGET_VECTOR_MIN_ALIGNMENT,                                        \
+    TARGET_VECTOR_ALWAYS_MISALIGN,                                      \
   }

 #define TARGET_DEFAULT_TARGET_FLAGS 0
@@ -504,6 +513,7 @@
 #define TARGET_ALLOCATE_INITIAL_VALUE NULL

 #define TARGET_UNSPEC_MAY_TRAP_P default_unspec_may_trap_p
+#define TARGET_COMMUTATIVE_OPERAND_PRECEDENCE NULL

 #ifndef TARGET_SET_CURRENT_FUNCTION
 #define TARGET_SET_CURRENT_FUNCTION hook_void_tree
@@ -532,6 +542,10 @@
 #define TARGET_INVALID_CONVERSION hook_constcharptr_const_tree_const_tree_null
 #define TARGET_INVALID_UNARY_OP hook_constcharptr_int_const_tree_null
 #define TARGET_INVALID_BINARY_OP hook_constcharptr_int_const_tree_const_tree_null
+#define TARGET_INVALID_PARAMETER_TYPE hook_constcharptr_const_tree_null
+#define TARGET_INVALID_RETURN_TYPE hook_constcharptr_const_tree_null
+#define TARGET_PROMOTED_TYPE hook_tree_const_tree_null
+#define TARGET_CONVERT_TO_TYPE hook_tree_tree_tree_null

 #define TARGET_FIXED_CONDITION_CODE_REGS hook_bool_uintp_uintp_false

@@ -590,6 +604,7 @@
 #define TARGET_ARG_PARTIAL_BYTES hook_int_CUMULATIVE_ARGS_mode_tree_bool_0

 #define TARGET_FUNCTION_VALUE default_function_value
+#define TARGET_LIBCALL_VALUE default_libcall_value
 #define TARGET_INTERNAL_ARG_POINTER default_internal_arg_pointer
 #define TARGET_UPDATE_STACK_BOUNDARY NULL
 #define TARGET_GET_DRAP_RTX NULL
@@ -613,6 +628,7 @@
    TARGET_ARG_PARTIAL_BYTES,					\
    TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN,			\
    TARGET_FUNCTION_VALUE,					\
+   TARGET_LIBCALL_VALUE,					\
    TARGET_INTERNAL_ARG_POINTER,					\
    TARGET_UPDATE_STACK_BOUNDARY,				\
    TARGET_GET_DRAP_RTX,						\
@@ -716,6 +732,11 @@
 #define TARGET_CXX_ADJUST_CLASS_AT_DEFINITION hook_void_tree
 #endif

+
+#ifndef TARGET_CXX_TTYPE_REF_ENCODE
+#define TARGET_CXX_TTYPE_REF_ENCODE NULL
+#endif
+
 #define TARGET_CXX				\
   {						\
     TARGET_CXX_GUARD_TYPE,			\
@@ -730,7 +751,8 @@
     TARGET_CXX_LIBRARY_RTTI_COMDAT,	        \
     TARGET_CXX_USE_AEABI_ATEXIT,		\
     TARGET_CXX_USE_ATEXIT_FOR_CXA_ATEXIT,	\
-    TARGET_CXX_ADJUST_CLASS_AT_DEFINITION	\
+    TARGET_CXX_ADJUST_CLASS_AT_DEFINITION,	\
+    TARGET_CXX_TTYPE_REF_ENCODE			\
   }

 /* EMUTLS specific */
@@ -886,6 +908,7 @@
   TARGET_ADDRESS_COST,				\
   TARGET_ALLOCATE_INITIAL_VALUE,		\
   TARGET_UNSPEC_MAY_TRAP_P,                     \
+  TARGET_COMMUTATIVE_OPERAND_PRECEDENCE,        \
   TARGET_DWARF_REGISTER_SPAN,                   \
   TARGET_INIT_DWARF_REG_SIZES_EXTRA,		\
   TARGET_FIXED_CONDITION_CODE_REGS,		\
@@ -913,6 +936,10 @@
   TARGET_INVALID_CONVERSION,			\
   TARGET_INVALID_UNARY_OP,			\
   TARGET_INVALID_BINARY_OP,			\
+  TARGET_INVALID_PARAMETER_TYPE,		\
+  TARGET_INVALID_RETURN_TYPE,			\
+  TARGET_PROMOTED_TYPE,				\
+  TARGET_CONVERT_TO_TYPE,			\
   TARGET_IRA_COVER_CLASSES,			\
   TARGET_SECONDARY_RELOAD,			\
   TARGET_EXPAND_TO_RTL_HOOK,			\
@@ -923,6 +950,7 @@
   TARGET_EMUTLS,				\
   TARGET_OPTION_HOOKS,				\
   TARGET_EXTRA_LIVE_ON_ENTRY,			\
+  TARGET_WARN_FUNC_RESULT,			\
   TARGET_UNWIND_TABLES_DEFAULT,			\
   TARGET_HAVE_NAMED_SECTIONS,			\
   TARGET_HAVE_SWITCHABLE_BSS_SECTIONS,		\
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -473,7 +473,16 @@

     /* Target builtin that implements vector permute.  */
     tree (* builtin_vec_perm) (tree, tree*);
-} vectorize;
+
+    /* Return the minimum alignment required to load or store a
+       vector of the given type, which may be less than the
+       natural alignment of the type.  */
+    int (* vector_min_alignment) (const_tree);
+
+    /* Return true if "movmisalign" patterns should be used for all
+       loads/stores from data arrays.  */
+    bool (* always_misalign) (const_tree);
+  } vectorize;

   /* The initial value of target_flags.  */
   int default_target_flags;
@@ -694,6 +703,10 @@
      FLAGS has the same meaning as in rtlanal.c: may_trap_p_1.  */
   int (* unspec_may_trap_p) (const_rtx x, unsigned flags);

+  /* Return a value indicating whether an operand of a commutative
+     operation is preferred as the first or second operand.  */
+  int (* commutative_operand_precedence) (const_rtx, int);
+
   /* Given a register, this hook should return a parallel of registers
      to represent where to find the register pieces.  Define this hook
      if the register and its mode are represented in Dwarf in
@@ -870,6 +883,10 @@
     rtx (*function_value) (const_tree ret_type, const_tree fn_decl_or_type,
 			   bool outgoing);

+    /* Return the rtx for the result of a libcall of mode MODE,
+       calling the function FN_NAME.  */
+    rtx (*libcall_value) (enum machine_mode, rtx);
+
     /* Return an rtx for the argument pointer incoming to the
        current function.  */
     rtx (*internal_arg_pointer) (void);
@@ -899,6 +916,24 @@
      is not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   const char *(*invalid_binary_op) (int op, const_tree type1, const_tree type2);

+  /* Return the diagnostic message string if TYPE is not valid as a
+     function parameter type, NULL otherwise.  */
+  const char *(*invalid_parameter_type) (const_tree type);
+
+  /* Return the diagnostic message string if TYPE is not valid as a
+     function return type, NULL otherwise.  */
+  const char *(*invalid_return_type) (const_tree type);
+
+  /* If values of TYPE are promoted to some other type when used in
+     expressions (analogous to the integer promotions), return that type,
+     or NULL_TREE otherwise.  */
+  tree (*promoted_type) (const_tree type);
+
+  /* Convert EXPR to TYPE, if target-specific types with special conversion
+     rules are involved.  Return the converted expression, or NULL to apply
+     the standard conversion rules.  */
+  tree (*convert_to_type) (tree type, tree expr);
+
   /* Return the array of IRA cover classes for the current target.  */
   const enum reg_class *(*ira_cover_classes) (void);

@@ -977,6 +1012,11 @@
        class  (eg, tweak visibility or perform any other required
        target modifications).  */
     void (*adjust_class_at_definition) (tree type);
+    /* Encode a reference type info, used for catching pointer
+       references.  The provided expression will be the address of the
+       type info object of the type to which a reference is being
+       caught.  */
+    tree (* ttype_ref_encode) (tree);
   } cxx;

   /* Functions and data for emulated TLS support.  */
@@ -1040,6 +1080,10 @@
      bits in the bitmap passed in. */
   void (*live_on_entry) (bitmap);

+  /* Return false if warnings about missing return statements or suspect
+     noreturn attributes should be suppressed for the current function.  */
+  bool (*warn_func_result) (void);
+
   /* True if unwinding tables should be generated by default.  */
   bool unwind_tables_default;

--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -441,6 +441,15 @@
   return NULL;
 }

+tree
+hook_cxx_ttype_ref_in_bit0 (tree exp)
+{
+  exp = convert (build_pointer_type (char_type_node), exp);
+  exp = pointer_int_sum (PLUS_EXPR, exp, integer_one_node);
+
+  return exp;
+}
+
 /* Initialize the stack protection decls.  */

 /* Stack protection related decls living in libgcc.  */
@@ -561,6 +570,12 @@
 }

 rtx
+default_libcall_value (enum machine_mode mode, rtx fun ATTRIBUTE_UNUSED)
+{
+  return LIBCALL_VALUE (mode);
+}
+
+rtx
 default_internal_arg_pointer (void)
 {
   /* If the reg that the virtual arg pointer will be translated into is
@@ -712,6 +727,12 @@
   return true;
 }

+int
+default_vector_min_alignment (const_tree type)
+{
+  return TYPE_ALIGN_UNIT (type);
+}
+
 bool
 default_hard_regno_scratch_ok (unsigned int regno ATTRIBUTE_UNUSED)
 {
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -48,6 +48,7 @@

 extern tree default_cxx_guard_type (void);
 extern tree default_cxx_get_cookie_size (tree);
+extern tree hook_cxx_ttype_ref_in_bit0 (tree);

 extern bool hook_pass_by_reference_must_pass_in_stack
   (CUMULATIVE_ARGS *, enum machine_mode mode, const_tree, bool);
@@ -71,6 +72,8 @@

 extern bool default_builtin_vector_alignment_reachable (const_tree, bool);

+extern int default_vector_min_alignment (const_tree);
+
 /* These are here, and not in hooks.[ch], because not all users of
    hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */

@@ -87,6 +90,7 @@
   (const_tree, const_tree, const_tree);
 extern bool hook_bool_const_rtx_commutative_p (const_rtx, int);
 extern rtx default_function_value (const_tree, const_tree, bool);
+extern rtx default_libcall_value (enum machine_mode, rtx);
 extern rtx default_internal_arg_pointer (void);
 #ifdef IRA_COVER_CLASSES
 extern const enum reg_class *default_ira_cover_classes (void);
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -134,6 +134,7 @@
 DEFTIMEVAR (TV_PREDCOM		     , "predictive commoning")
 DEFTIMEVAR (TV_TREE_LOOP_INIT	     , "tree loop init")
 DEFTIMEVAR (TV_TREE_LOOP_FINI	     , "tree loop fini")
+DEFTIMEVAR (TV_TREE_LOOP_PROMOTE     , "tree loop index promotion")
 DEFTIMEVAR (TV_TREE_CH		     , "tree copy headers")
 DEFTIMEVAR (TV_TREE_SSA_UNCPROP	     , "tree SSA uncprop")
 DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL    , "tree SSA to normal")
@@ -141,6 +142,7 @@
 DEFTIMEVAR (TV_TREE_COPY_RENAME	     , "tree rename SSA copies")
 DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
 DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
+DEFTIMEVAR (TV_TREE_RLS              , "tree local static removal")
 DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization conversion")
 DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
 DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
--- a/gcc/toplev.h
+++ b/gcc/toplev.h
@@ -139,6 +139,7 @@
 extern int flag_unroll_all_loops;
 extern int flag_unswitch_loops;
 extern int flag_cprop_registers;
+extern int flag_remove_local_statics;
 extern int time_report;
 extern int flag_ira_coalesce;
 extern int flag_ira_move_spills;
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -4062,6 +4062,7 @@
 		      bool *no_add_attrs)
 {
   tree node = *pnode;
+  bool is_dllimport;

   /* These attributes may apply to structure and union types being created,
      but otherwise should pass to the declaration involved.  */
@@ -4109,9 +4110,11 @@
       return NULL_TREE;
     }

+  is_dllimport = is_attribute_p ("dllimport", name);
+
   /* Report error on dllimport ambiguities seen now before they cause
      any damage.  */
-  else if (is_attribute_p ("dllimport", name))
+  if (is_dllimport)
     {
       /* Honor any target-specific overrides. */
       if (!targetm.valid_dllimport_attribute_p (node))
@@ -4153,6 +4156,9 @@
       if (*no_add_attrs == false)
         DECL_DLLIMPORT_P (node) = 1;
     }
+  else if (DECL_DECLARED_INLINE_P (node))
+    /* An exported function, even if inline, must be emitted.  */
+    DECL_EXTERNAL (node) = 0;

   /*  Report error if symbol is not accessible at global scope.  */
   if (!TREE_PUBLIC (node)
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -47,6 +47,7 @@
 #include "value-prof.h"
 #include "pointer-set.h"
 #include "tree-inline.h"
+#include "target.h"

 /* This file contains functions for building the Control Flow Graph (CFG)
    for a function tree.  */
@@ -7052,6 +7053,9 @@
   edge e;
   edge_iterator ei;

+  if (!targetm.warn_func_result())
+    return 0;
+
   /* If we have a path to EXIT, then we do return.  */
   if (TREE_THIS_VOLATILE (cfun->decl)
       && EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0)
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -381,8 +381,10 @@
   unsigned lang_flag_5 : 1;
   unsigned lang_flag_6 : 1;
   unsigned visited : 1;
+  unsigned packed_flag : 1;
+  unsigned user_align : 1;

-  unsigned spare : 23;
+  unsigned spare : 21;

   union tree_ann_d *ann;
 };
@@ -2140,7 +2142,7 @@

 /* 1 if the alignment for this type was requested by "aligned" attribute,
    0 if it is the default for this type.  */
-#define TYPE_USER_ALIGN(NODE) (TYPE_CHECK (NODE)->type.user_align)
+#define TYPE_USER_ALIGN(NODE) (TYPE_CHECK (NODE)->common.base.user_align)

 /* The alignment for NODE, in bytes.  */
 #define TYPE_ALIGN_UNIT(NODE) (TYPE_ALIGN (NODE) / BITS_PER_UNIT)
@@ -2246,7 +2248,7 @@

 /* Indicated that objects of this type should be laid out in as
    compact a way as possible.  */
-#define TYPE_PACKED(NODE) (TYPE_CHECK (NODE)->type.packed_flag)
+#define TYPE_PACKED(NODE) (TYPE_CHECK (NODE)->common.base.packed_flag)

 /* Used by type_contains_placeholder_p to avoid recomputation.
    Values are: 0 (unknown), 1 (false), 2 (true).  Never access
@@ -2265,17 +2267,16 @@
   tree attributes;
   unsigned int uid;

-  unsigned int precision : 9;
-  ENUM_BITFIELD(machine_mode) mode : 7;
-
-  unsigned string_flag : 1;
+  unsigned int precision : 10;
   unsigned no_force_blk_flag : 1;
   unsigned needs_constructing_flag : 1;
   unsigned transparent_union_flag : 1;
-  unsigned packed_flag : 1;
   unsigned restrict_flag : 1;
   unsigned contains_placeholder_bits : 2;

+  ENUM_BITFIELD(machine_mode) mode : 8;
+
+  unsigned string_flag : 1;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -2283,7 +2284,6 @@
   unsigned lang_flag_4 : 1;
   unsigned lang_flag_5 : 1;
   unsigned lang_flag_6 : 1;
-  unsigned user_align : 1;

   unsigned int align;
   alias_set_type alias_set;
@@ -2584,7 +2584,7 @@
 #define DECL_ALIGN_UNIT(NODE) (DECL_ALIGN (NODE) / BITS_PER_UNIT)
 /* Set if the alignment of this DECL has been set by the user, for
    example with an 'aligned' attribute.  */
-#define DECL_USER_ALIGN(NODE) (DECL_COMMON_CHECK (NODE)->decl_common.user_align)
+#define DECL_USER_ALIGN(NODE) (DECL_COMMON_CHECK (NODE)->common.base.user_align)
 /* Holds the machine mode corresponding to the declaration of a variable or
    field.  Always equal to TYPE_MODE (TREE_TYPE (decl)) except for a
    FIELD_DECL.  */
@@ -2621,7 +2621,7 @@
    example, for a FUNCTION_DECL, DECL_SAVED_TREE may be non-NULL and
    DECL_EXTERNAL may be true simultaneously; that can be the case for
    a C99 "extern inline" function.  */
-#define DECL_EXTERNAL(NODE) (DECL_COMMON_CHECK (NODE)->decl_common.decl_flag_2)
+#define DECL_EXTERNAL(NODE) (DECL_COMMON_CHECK (NODE)->decl_common.decl_flag_1)

 /* Nonzero in a ..._DECL means this variable is ref'd from a nested function.
    For VAR_DECL nodes, PARM_DECL nodes, and FUNCTION_DECL nodes.
@@ -2696,7 +2696,6 @@
   unsigned ignored_flag : 1;
   unsigned abstract_flag : 1;
   unsigned artificial_flag : 1;
-  unsigned user_align : 1;
   unsigned preserve_flag: 1;
   unsigned debug_expr_is_from : 1;

@@ -2712,22 +2711,20 @@
   /* In LABEL_DECL, this is DECL_ERROR_ISSUED.
      In VAR_DECL and PARM_DECL, this is DECL_REGISTER.  */
   unsigned decl_flag_0 : 1;
-  /* In FIELD_DECL, this is DECL_PACKED.  */
-  unsigned decl_flag_1 : 1;
   /* In FIELD_DECL, this is DECL_BIT_FIELD
      In VAR_DECL and FUNCTION_DECL, this is DECL_EXTERNAL.
-     In TYPE_DECL, this is TYPE_DECL_SUPRESS_DEBUG.  */
-  unsigned decl_flag_2 : 1;
+     In TYPE_DECL, this is TYPE_DECL_SUPPRESS_DEBUG.  */
+  unsigned decl_flag_1 : 1;
   /* In FIELD_DECL, this is DECL_NONADDRESSABLE_P
-     In VAR_DECL and PARM_DECL, this is DECL_HAS_VALUE_EXPR.  */
-  unsigned decl_flag_3 : 1;
+     In VAR_DECL and PARM_DECL, this is DECL_HAS_VALUE_EXPR_P.  */
+  unsigned decl_flag_2 : 1;
   /* Logically, these two would go in a theoretical base shared by var and
      parm decl. */
   unsigned gimple_reg_flag : 1;
   /* In a DECL with pointer type, set if no TBAA should be done.  */
   unsigned no_tbaa_flag : 1;
   /* Padding so that 'align' can be on a 32-bit boundary.  */
-  unsigned decl_common_unused : 2;
+  unsigned decl_common_unused : 4;

   unsigned int align : 24;
   /* DECL_OFFSET_ALIGN, used only for FIELD_DECLs.  */
@@ -2751,7 +2748,7 @@
    decl itself.  This should only be used for debugging; once this field has
    been set, the decl itself may not legitimately appear in the function.  */
 #define DECL_HAS_VALUE_EXPR_P(NODE) \
-  (TREE_CHECK2 (NODE, VAR_DECL, PARM_DECL)->decl_common.decl_flag_3)
+  (TREE_CHECK2 (NODE, VAR_DECL, PARM_DECL)->decl_common.decl_flag_2)
 #define DECL_VALUE_EXPR(NODE) \
   (decl_value_expr_lookup (DECL_WRTL_CHECK (NODE)))
 #define SET_DECL_VALUE_EXPR(NODE, VAL)			\
@@ -2830,11 +2827,11 @@
 #define DECL_FCONTEXT(NODE) (FIELD_DECL_CHECK (NODE)->field_decl.fcontext)

 /* In a FIELD_DECL, indicates this field should be bit-packed.  */
-#define DECL_PACKED(NODE) (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_1)
+#define DECL_PACKED(NODE) (FIELD_DECL_CHECK (NODE)->common.base.packed_flag)

 /* Nonzero in a FIELD_DECL means it is a bit field, and must be accessed
    specially.  */
-#define DECL_BIT_FIELD(NODE) (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_2)
+#define DECL_BIT_FIELD(NODE) (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_1)

 /* Used in a FIELD_DECL to indicate that we cannot form the address of
    this component.  This makes it possible for Type-Based Alias Analysis
@@ -2852,7 +2849,7 @@
    accesses to s.i must not be given the alias set of the type of 'i'
    (int) but instead directly that of the type of 's' (struct S).  */
 #define DECL_NONADDRESSABLE_P(NODE) \
-  (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_3)
+  (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_2)

 struct tree_field_decl GTY(())
 {
@@ -3337,7 +3334,7 @@
    into stabs.  Instead it will generate cross reference ('x') of names.
    This uses the same flag as DECL_EXTERNAL.  */
 #define TYPE_DECL_SUPPRESS_DEBUG(NODE) \
-  (TYPE_DECL_CHECK (NODE)->decl_common.decl_flag_2)
+  (TYPE_DECL_CHECK (NODE)->decl_common.decl_flag_1)

 /* Getter of the imported declaration associated to the
    IMPORTED_DECL node.  */
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -323,6 +323,7 @@
 extern struct gimple_opt_pass pass_empty_loop;
 extern struct gimple_opt_pass pass_record_bounds;
 extern struct gimple_opt_pass pass_graphite_transforms;
+extern struct gimple_opt_pass pass_promote_indices;
 extern struct gimple_opt_pass pass_if_conversion;
 extern struct gimple_opt_pass pass_loop_distribution;
 extern struct gimple_opt_pass pass_vectorize;
@@ -388,6 +389,7 @@
 extern struct gimple_opt_pass pass_rebuild_cgraph_edges;
 extern struct gimple_opt_pass pass_build_cgraph_edges;
 extern struct gimple_opt_pass pass_reset_cc_flags;
+extern struct gimple_opt_pass pass_remove_local_statics;

 /* IPA Passes */
 extern struct ipa_opt_pass pass_ipa_inline;
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -274,6 +274,12 @@
 		      != TYPE_PRECISION (TREE_TYPE (t))))
 		goto fail;

+	      /* Disable optimization of bitfields on BITS_BIG_ENDIAN
+		 architectures.  SRA doesn't properly handle padding bits
+		 at the bottom, see issue6713.  */
+	      if (DECL_BIT_FIELD (t) && BITS_BIG_ENDIAN)
+		goto fail;
+
 	      saw_one_field = true;
 	    }

--- /dev/null
+++ b/gcc/tree-ssa-loop-promote.c
@@ -0,0 +1,1628 @@
+/* Promotion of shorter-than-word-size loop indices.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This pass finds loop indices that are declared as
+   shorter-than-word-size and replaces them with word-sized loop
+   indices.  (It assumes that word-sized quantities are the most
+   efficient type on which to do arithmetic.)  The loop optimization
+   machinery has a difficult time seeing through the casts required to
+   promote such indices to word-sized quantities for memory addressing
+   and/or preserving the semantics of the source language (such as C).
+   The transformation also helps eliminate unnecessary
+   {sign,zero}-extensions required for the same.
+
+   Although this is most naturally expressed as a loop optimization
+   pass, we choose to place this pass some ways before the loop
+   optimization passes proper, so that other scalar optimizations will
+   run on our "cleaned-up" code.  This decision has the negative of
+   requiring us to build and destroy all the loop optimization
+   infrastructure.
+
+   The algorithm is relatively simple.  For each single-exit loop, we
+   identify the loop index variable.  If the loop index variable is
+   shorter than the word size, then we have a candidate for promotion.
+   We determine whether the scalar evolution of the loop index fits a
+   particular pattern (incremented by 1, compared against a
+   similarly-typed loop bound, and only modified by a single increment
+   within the loop), as well as examining the uses of the loop index to
+   ensure we are able to safely promote those uses (e.g. the loop index
+   must not be stored to memory or passed to function calls).  If these
+   conditions are satisfied, we create an appropriate word-sized type
+   and replace all uses and defs of the loop index variable with the new
+   variable.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#include "toplev.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "hard-reg-set.h"
+#include "obstack.h"
+#include "basic-block.h"
+#include "pointer-set.h"
+#include "intl.h"
+
+#include "tree.h"
+#include "gimple.h"
+#include "hashtab.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "cfgloop.h"
+#include "flags.h"
+#include "timevar.h"
+#include "tree-pass.h"
+
+struct promote_info {
+  /* The loop being analyzed.  */
+  struct loop *loop;
+
+  /* The GIMPLE_COND controlling exit from the loop.  */
+  gimple exit_expr;
+
+  /* The loop index variable's SSA_NAME that is defined in a phi node in
+     LOOP->HEADER.  Note that this SSA_NAME may be different than the
+     one appearing in EXIT_EXPR.  */
+  tree loop_index_name;
+
+  /* The bound of the loop.  */
+  tree loop_limit;
+
+  /* Whether we've warned about things with
+     warn_unsafe_loop_optimizations.  */
+  bool warned;
+
+  /* LOOP_INDEX_NAME's underlying VAR_DECL.  */
+  tree var_decl;
+
+  /* The types to which defs/uses of LOOP_INDEX_NAME are cast via
+     NOP_EXPRs.  */
+  VEC(tree, heap) *cast_types;
+
+  /* The number of times we have seen a cast to the corresponding type
+     (as determined by types_compatible_p) in CAST_TYPES.  */
+  VEC(int, heap) *cast_counts;
+
+  /* Whether LOOP_INDEX_NAME is suitable for promotion.  */
+  bool can_be_promoted_p;
+
+  /* If CAN_BE_PROMOTED_P, the promoted type.  */
+  tree promoted_type;
+
+  /* If CAN_BE_PROMOTED_P, the promoted VAR_DECL.  */
+  tree promoted_var;
+};
+
+/* A set of `struct promote_info'.  */
+
+static struct pointer_set_t *promotion_info;
+
+/* A set of all potentially promotable SSA_NAMEs, used for quick
+decision-making during analysis.  */
+
+static struct pointer_set_t *promotable_names;
+
+/* A map from SSA_NAMEs to the VAR_DECL to which they will be
+   promoted.  */
+
+static struct pointer_map_t *variable_map;
+
+/* A set of the stmts that we have already rebuilt with promoted variables.  */
+
+static struct pointer_set_t *promoted_stmts;
+
+
+/* Add CASTED to PI->CAST_TYPES if we haven't seen CASTED before.  */
+
+static void
+add_casted_type (struct promote_info *pi, tree casted)
+{
+  int i;
+  tree type;
+
+  /* For this information to be useful later, CASTED must be wider than
+     the type of the variable.  */
+  if (TYPE_PRECISION (casted) <= TYPE_PRECISION (TREE_TYPE (pi->var_decl)))
+    return;
+
+  for (i = 0; VEC_iterate (tree, pi->cast_types, i, type); i++)
+    if (types_compatible_p (casted, type))
+      {
+       int c = VEC_index(int, pi->cast_counts, i);
+       VEC_replace(int, pi->cast_counts, i, ++c);
+       return;
+      }
+
+  /* Haven't see the type before.  */
+  VEC_safe_push (tree, heap, pi->cast_types, casted);
+  VEC_safe_push (int, heap, pi->cast_counts, 1);
+}
+
+/* Return the most-casted-to type in PI->CAST_TYPES.  Return an
+   appropriately signed variant of size_type_node if the variable wasn't
+   cast in some fashion.  */
+
+static tree
+choose_profitable_promoted_type (struct promote_info *pi)
+{
+  int i;
+  int count;
+  tree type = NULL_TREE;
+  int maxuse = -1;
+
+  for (i = 0; VEC_iterate (int, pi->cast_counts, i, count); i++)
+    if (count > maxuse)
+      {
+       maxuse = count;
+       type = VEC_index (tree, pi->cast_types, i);
+      }
+
+  if (type == NULL_TREE)
+    {
+      if (dump_file)
+       {
+         fprintf (dump_file, "Warning, failed to find upcast type for ");
+         print_generic_expr (dump_file, pi->loop_index_name, 0);
+         fprintf (dump_file, "\n");
+       }
+      return (TYPE_UNSIGNED (TREE_TYPE (pi->var_decl))
+             ? size_type_node
+             : signed_type_for (size_type_node));
+    }
+  else
+    return signed_type_for (type);
+}
+
+/* Intuit the loop index for LOOP from PHI.  There must be a path that
+   only goes through NOP_EXPRs or CONVERT_EXPRs from the result of PHI
+   to one of the operands of COND.  If such a path cannot be found,
+   return NULL_TREE.  If LIMIT is not NULL and a path can be found,
+   store the other operand of COND into LIMIT.  */
+
+static tree
+find_promotion_candidate_from_phi (struct loop *loop, gimple cond,
+                                  gimple phi, tree *limit)
+{
+  tree op0, op1;
+  tree result, candidate;
+
+  result = candidate = PHI_RESULT (phi);
+  /* Must be an integer variable.  */
+  if (TREE_CODE (TREE_TYPE (candidate)) != INTEGER_TYPE)
+    return NULL_TREE;
+
+  op0 = gimple_cond_lhs (cond);
+  op1 = gimple_cond_rhs (cond);
+
+  /* See if there's a path from CANDIDATE to an operand of COND.  */
+  while (true)
+    {
+      use_operand_p use;
+      imm_use_iterator iui;
+      gimple use_stmt = NULL;
+
+      if (candidate == op0)
+       {
+         if (limit) *limit = op1;
+         break;
+       }
+      if (candidate == op1)
+       {
+         if (limit) *limit = op0;
+         break;
+       }
+
+      /* Find a single use in the loop header.  Give up if there's
+        multiple ones.  */
+      FOR_EACH_IMM_USE_FAST (use, iui, candidate)
+       {
+         gimple stmt = USE_STMT (use);
+
+         if (gimple_bb (stmt) == loop->header)
+           {
+             if (use_stmt)
+               {
+                 if (dump_file)
+                   {
+                     fprintf (dump_file, "Rejecting ");
+                     print_generic_expr (dump_file, candidate, 0);
+                     fprintf (dump_file, " because it has multiple uses in the loop header (bb #%d).\n",
+                              loop->header->index);
+                     fprintf (dump_file, "first use: ");
+                     print_gimple_stmt (dump_file, use_stmt, 0, 0);
+                     fprintf (dump_file, "\nsecond use: ");
+                     print_gimple_stmt (dump_file, stmt, 0, 0);
+                     fprintf (dump_file, "\n(possibly more, but unanalyzed)\n");
+                   }
+                 return NULL_TREE;
+               }
+             else
+               use_stmt = stmt;
+           }
+       }
+
+      /* No uses in the loop header, bail.  */
+      if (use_stmt == NULL)
+       return NULL_TREE;
+
+      if (gimple_code (use_stmt) != GIMPLE_ASSIGN
+         || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME
+         || (gimple_assign_rhs_code (use_stmt) != NOP_EXPR
+             && gimple_assign_rhs_code (use_stmt) != CONVERT_EXPR))
+       {
+         if (dump_file)
+           {
+             fprintf (dump_file, "Rejecting ");
+             print_generic_expr (dump_file, candidate, 0);
+             fprintf (dump_file, " because of use in ");
+             print_gimple_stmt (dump_file, use_stmt, 0, 0);
+             fprintf (dump_file, "\n");
+           }
+         return NULL_TREE;
+       }
+
+      candidate = gimple_assign_lhs (use_stmt);
+    }
+
+  /* CANDIDATE is now what we believe to be the loop index variable.  There
+     are two possibilities:
+
+     - CANDIDATE is not the "true" loop index variable, but rather is a
+       promoted version of RESULT, done for purposes of satisfying a
+       language's semantics;
+
+     - CANDIDATE is the "true" loop index variable.  */
+  if (!types_compatible_p (TREE_TYPE (result), TREE_TYPE (candidate)))
+    candidate = result;
+
+  /* The type of candidate must be "short" to consider promoting it.  */
+  if (TREE_CODE (TREE_TYPE (candidate)) != INTEGER_TYPE
+      || TYPE_PRECISION (TREE_TYPE (candidate)) >= TYPE_PRECISION (size_type_node))
+    return NULL_TREE;
+
+  return candidate;
+}
+
+/* Find the loop index variable of LOOP.  LOOP's exit is controlled by
+   the COND_EXPR EXPR.  IF we can't determine what the loop index
+   variable is, or EXPR does not appear to be analyzable, then return
+   NULL_TREE.  */
+
+static tree
+find_promotion_candidate (struct loop *loop, gimple cond, tree *limit)
+{
+  tree candidate = NULL_TREE;
+  gimple_stmt_iterator gsi;
+
+  switch (gimple_cond_code (cond))
+    {
+    case GT_EXPR:
+    case GE_EXPR:
+    case NE_EXPR:
+    case LT_EXPR:
+    case LE_EXPR:
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  /* We'd like to examine COND and intuit the loop index variable from
+     there.  Instead, we're going to start from the phi nodes in BB and
+     attempt to work our way forwards to one of the operands of COND,
+     since starting from COND might yield an upcast loop index.  If we
+     find multiple phi nodes whose results reach COND, then give up.  */
+  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple phi = gsi_stmt (gsi);
+      tree t = find_promotion_candidate_from_phi (loop, cond, phi, limit);
+
+      if (t == NULL_TREE)
+       continue;
+      else if (candidate == NULL_TREE)
+       candidate = t;
+      else
+       {
+         if (dump_file)
+           {
+             fprintf (dump_file, "Can't find a candidate from ");
+             print_gimple_stmt (dump_file, cond, 0, 0);
+             fprintf (dump_file, "\n  because too many phi node results reach the condition.\n");
+           }
+         return NULL_TREE;
+       }
+    }
+
+  return candidate;
+}
+
+/* Return true if X is something that could be promoted.  */
+
+static bool
+could_be_promoted (tree x)
+{
+  return (TREE_CODE (x) == INTEGER_CST
+         || (TREE_CODE (x) == SSA_NAME
+             && pointer_set_contains (promotable_names, x)));
+}
+
+/* Examine the RHS of STMT's suitability with respect to being able to
+   promote VAR.  */
+
+static bool
+check_rhs_for_promotability (struct promote_info *pi, tree var, gimple stmt,
+                            bool is_assign)
+{
+  enum tree_code subcode = gimple_assign_rhs_code (stmt);
+
+  bool ok = true;
+
+  switch (subcode)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+    case EQ_EXPR:
+    case NE_EXPR:
+    case LT_EXPR:
+    case LE_EXPR:
+    case GT_EXPR:
+    case GE_EXPR:
+      {
+       tree op0 = gimple_assign_rhs1 (stmt);
+       tree op1 = gimple_assign_rhs2 (stmt);
+
+       ok = ((op0 == var && could_be_promoted (op1))
+             || (op1 == var && could_be_promoted (op0)));
+       break;
+      }
+    case COND_EXPR:
+      if (gimple_expr_type (stmt) == NULL
+         || gimple_expr_type (stmt) == void_type_node)
+       ok = true;
+      else
+       /* This is conservative; it's possible that these sorts of nodes
+          could be promoted, but we'd have to be very careful about
+          checking in which parts of the COND_EXPR the promotable
+          variable(s) are.  */
+       ok = false;
+      break;
+    case SSA_NAME:
+      {
+       tree expr = gimple_assign_rhs1 (stmt);
+       ok = (expr == var || could_be_promoted (expr));
+      }
+      break;
+    case INTEGER_CST:
+      break;
+    case NOP_EXPR:
+    case CONVERT_EXPR:
+      if (!is_assign)
+       {
+         add_casted_type (pi, gimple_expr_type (stmt));
+         break;
+       }
+      /* Fallthrough.  */
+    default:
+      ok = false;
+      break;
+    }
+
+  return ok;
+}
+
+/* Analyze the loop index VAR for promotability.  The rules for
+   promotability are:
+
+   For uses:
+
+   - The underlying variable may be used in NOP_EXPRs.
+
+   - The underlying variable may be used in simple arithmmetic
+     expressions so long as the other parts are potentially promotable
+     variables or constants (so we don't go willy-nilly on promoting
+     things).
+
+   - The underlying variable may not be stored to memory.
+
+   - All uses must occur inside the loop.
+
+   For defs:
+
+   - The underlying variable may not be loaded from memory; and
+
+   - The underlying variable may only be formed from expressions
+     involving potentially promotable varibles or constants.
+
+   Note that defs may occur outside of the loop; we do this to handle
+   initial conditions before entering the loop.  */
+
+static void
+analyze_loop_index_uses (tree var, struct promote_info *pi)
+{
+  imm_use_iterator iui;
+  use_operand_p use;
+  gimple bad_stmt = NULL;
+  const char *reason = NULL;
+
+  FOR_EACH_IMM_USE_FAST (use, iui, var)
+    {
+      basic_block bb;
+      gimple use_stmt = USE_STMT (use);
+
+      /* Uses must exist only within the loop.  */
+      bb = gimple_bb (use_stmt);
+
+      if (dump_file)
+       {
+         fprintf (dump_file, "Checking ");
+         print_gimple_stmt (dump_file, use_stmt, 0, 0);
+         fprintf (dump_file, "\n");
+       }
+
+      if (!flow_bb_inside_loop_p (pi->loop, bb))
+       {
+         bad_stmt = use_stmt;
+         reason = " is involved in stmt outside loop ";
+         break;
+       }
+
+      /* We cannot store the index to memory.  */
+      if (gimple_references_memory_p (use_stmt))
+       {
+         bad_stmt = use_stmt;
+         reason = " is stored to memory in ";
+         break;
+       }
+
+      if (gimple_code (use_stmt) == GIMPLE_CALL)
+       {
+         /* We cannot pass the variable to a function.  */
+         bad_stmt = use_stmt;
+         reason = " is passed to function in ";
+         break;
+       }
+      else if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
+       {
+         tree lhs = gimple_assign_lhs (use_stmt);
+
+         if (!check_rhs_for_promotability (pi, var, use_stmt,
+                                           /*is_assign=*/false))
+           {
+             bad_stmt = use_stmt;
+             reason = " is involved in non-promotable expression ";
+             break;
+           }
+         else if ((TREE_CODE_CLASS (gimple_assign_rhs_code (use_stmt)) == tcc_binary
+                   || gimple_assign_rhs_code (use_stmt) == SSA_NAME)
+                  && !could_be_promoted (lhs))
+           {
+             bad_stmt = use_stmt;
+             reason = " is being assigned to non-promotable variable ";
+             break;
+           }
+       }
+      else if (gimple_code (use_stmt) != GIMPLE_COND
+              && gimple_code (use_stmt) != GIMPLE_PHI)
+       {
+         /* Use of the variable in some statement we don't know how to
+            analyze.  */
+         bad_stmt = use_stmt;
+         reason = " is used in unanalyzable expression in ";
+         break;
+       }
+    }
+
+  if (bad_stmt && reason)
+    {
+      if (dump_file)
+       {
+         fprintf (dump_file, "Loop index ");
+         print_generic_expr (dump_file, var, 0);
+         fprintf (dump_file, "%s", reason);
+         print_gimple_stmt (dump_file, bad_stmt, 0, 0);
+         fprintf (dump_file, "\n");
+       }
+      pi->can_be_promoted_p = false;
+    }
+}
+
+/* Check that the uses and def of VAR, defined in STMT, conform to the
+   rules given above.  */
+
+static bool
+analyze_loop_index (tree var, gimple stmt, void *data)
+{
+  struct promote_info *pi = (struct promote_info *) data;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Analyzing loop index ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, " defined in ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Check the definition.  */
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_PHI:
+      /* Phi nodes are OK.  */
+      break;
+
+    case GIMPLE_ASSIGN:
+      if (!check_rhs_for_promotability (pi, var, stmt,
+                                       /*is_assign=*/true))
+       break;
+      /* Fallthrough.  */
+
+    default:
+      /* Something we can't handle or the variable is being loaded from
+        memory.  */
+      pi->can_be_promoted_p = false;
+      goto done;
+    }
+
+  if (gimple_code (stmt) == GIMPLE_PHI)
+    {
+      unsigned int i;
+
+      for (i = 0; i < gimple_phi_num_args (stmt); i++)
+       {
+         tree arg = PHI_ARG_DEF (stmt, i);
+
+         if (TREE_CODE (arg) == SSA_NAME)
+           pointer_set_insert (promotable_names, arg);
+       }
+
+      analyze_loop_index_uses (PHI_RESULT (stmt), pi);
+    }
+  else
+    analyze_loop_index_uses (var, pi);
+
+  /* Only worth continuing if we think the loop index can be
+     promoted.  */
+ done:
+  if (dump_file)
+    {
+      fprintf (dump_file, "Done analyzing ");
+      print_generic_expr (dump_file, var, 0);
+      fprintf (dump_file, " defined in ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+      fprintf (dump_file, "...%s to analyze\n\n",
+              pi->can_be_promoted_p ? "continuing" : "not continuing");
+    }
+  return !pi->can_be_promoted_p;
+}
+
+/* Determine whether T is an INTEGER_CST or a single-use SSA_NAME
+   defined as the result of a NOP_EXPR or CONVERT_EXPR.  Return the
+   operand of the NOP_EXPR or CONVERT_EXPR if so.  */
+
+static tree
+upcast_operand_p (tree t)
+{
+  gimple def;
+
+  if (TREE_CODE (t) == INTEGER_CST)
+    return t;
+
+  if (TREE_CODE (t) != SSA_NAME
+      || !has_single_use (t))
+    return NULL_TREE;
+
+  def = SSA_NAME_DEF_STMT (t);
+  if (gimple_code (def) != GIMPLE_ASSIGN)
+    return NULL_TREE;
+
+  if (gimple_assign_rhs_code (def) != CONVERT_EXPR
+      && gimple_assign_rhs_code (def) != NOP_EXPR)
+    return NULL_TREE;
+
+  return gimple_assign_rhs1 (def);
+}
+
+/* Check for the idiom:
+
+     short x, y;
+     unsigned short x.2, y.2, tmp;
+     ...
+     x.2 = (unsigned short) x;
+     y.2 = (unsigned short) y;
+     tmp = x.2 + y.2;
+     x = (short) tmp;
+
+   which is generated by convert for avoiding signed arithmetic
+   overflow.  RHS is TMP in the above statement.  If RHS is
+   defined via such an idiom, store x and y into *OP0 and *OP1,
+   respectively.  We permit y.2 to be a constant if necessary.  */
+
+static bool
+signed_arithmetic_overflow_idiom_p (tree rhs, tree *op0, tree *op1)
+{
+  gimple op_stmt = SSA_NAME_DEF_STMT (rhs);
+  tree x2, y2;
+  bool yes = false;
+  enum tree_code code;
+
+  if (!has_single_use (rhs)
+      || gimple_code (op_stmt) != GIMPLE_ASSIGN)
+    goto done;
+
+  /* This could probably profitably be expanded to consider
+     MINUS_EXPR, MULT_EXPR, etc.  */
+  code = gimple_assign_rhs_code (op_stmt);
+  if (code != PLUS_EXPR)
+    goto done;
+  x2 = gimple_assign_rhs1 (op_stmt);
+  y2 = gimple_assign_rhs2 (op_stmt);
+
+  x2 = upcast_operand_p (x2);
+  if (x2 == NULL_TREE)
+    goto done;
+  y2 = upcast_operand_p (y2);
+  if (y2 == NULL_TREE)
+    goto done;
+
+  *op0 = x2;
+  *op1 = y2;
+  yes = true;
+
+ done:
+  return yes;
+}
+
+/* Simple wrapper around flow_bb_inside_loop_p that handles NULL
+   statements and initial definitions of variables.  */
+
+static bool
+stmt_in_loop_p (gimple t, struct loop *loop)
+{
+  basic_block bb;
+
+  if (t == NULL)
+    return false;
+
+  bb = gimple_bb (t);
+  if (bb == NULL)
+    return false;
+
+  return flow_bb_inside_loop_p (loop, bb);
+}
+
+/* The loop index should have a specific usage pattern:
+
+   - It should be defined in a phi node with two incoming values:
+
+     LI_phi = PHI (LI_out, LI_in)
+
+   - One incoming value, LI_out, should be from outside the loop.
+
+   - The other incoming value, LI_in, should be defined thusly:
+
+     LI_in = LI_phi + increment
+
+   - increment should be 1.  We permit other increments with
+     -funsafe-loop-optimizations.
+
+   - Finally, in the comparison to exit the loop, the loop index must be
+     compared against a variable that has a type at least as precise as
+     the loop index's type.  For instance, something like:
+
+       char limit;
+       short i;
+
+       for (i = 0; i < limit; i++) ...
+
+     would not be permitted.  */
+
+static bool
+analyze_loop_index_definition_pattern (struct promote_info *pi)
+{
+  gimple phi = SSA_NAME_DEF_STMT (pi->loop_index_name);
+  bool ok = false, warn = false;
+  tree in0, in1;
+  bool inside0, inside1;
+  gimple def0, def1;
+  tree op0, op1, increment = NULL_TREE;
+
+  if (gimple_code (phi) != GIMPLE_PHI
+      || gimple_phi_num_args (phi) != 2)
+    goto done;
+
+  in0 = PHI_ARG_DEF (phi, 0);
+  in1 = PHI_ARG_DEF (phi, 1);
+
+  /* Figure out which value comes from outside the loop.  */
+  def0 = TREE_CODE (in0) == SSA_NAME ? SSA_NAME_DEF_STMT (in0) : NULL;
+  def1 = TREE_CODE (in1) == SSA_NAME ? SSA_NAME_DEF_STMT (in1) : NULL;
+
+  inside0 = stmt_in_loop_p (def0, pi->loop);
+  inside1 = stmt_in_loop_p (def1, pi->loop);
+
+  if (inside0 && inside1)
+    goto done;
+  else if (inside0)
+    {
+      tree t = in0;
+      gimple g;
+      in0 = in1;
+      in1 = t;
+      g = def0;
+      def0 = def1;
+      def1 = g;
+    }
+  else if (!inside1)
+    goto done;
+
+  /* IN0 comes from outside the loop, IN1 from inside.  Analyze IN1.  */
+  if (gimple_code (def1) != GIMPLE_ASSIGN)
+    goto done;
+
+  switch (gimple_assign_rhs_code (def1))
+    {
+    case CONVERT_EXPR:
+    case NOP_EXPR:
+      if (!signed_arithmetic_overflow_idiom_p (gimple_assign_rhs1 (def1),
+                                              &op0, &op1))
+       goto done;
+      goto plus;
+    case PLUS_EXPR:
+      op0 = gimple_assign_rhs1 (def1);
+      op1 = gimple_assign_rhs2 (def1);
+    plus:
+      {
+       bool op0_li = op0 == PHI_RESULT (phi);
+       bool op1_li = op1 == PHI_RESULT (phi);
+       if (op0_li && op1_li)
+         /* This is weird, and definitely is not a case we can support
+            for promotion.  */
+         goto done;
+       else if (op0_li)
+         increment = op1;
+       else if (op1_li)
+         increment = op0;
+       else
+         goto done;
+       break;
+      }
+    default:
+      break;
+    }
+
+
+  /* Check that the exit condition for the loop is OK.  */
+  {
+    enum tree_code code = gimple_cond_code (pi->exit_expr);
+
+    op0 = gimple_cond_lhs (pi->exit_expr);
+    op1 = gimple_cond_rhs (pi->exit_expr);
+
+    if (op0 == pi->loop_limit)
+      {
+       tree t = op0;
+       op0 = op1;
+       op1 = t;
+       code = swap_tree_comparison (code);
+      }
+
+    if (code != LT_EXPR && code != LE_EXPR)
+      goto done;
+
+    if (!types_compatible_p (TREE_TYPE (pi->loop_index_name),
+                            TREE_TYPE (pi->loop_limit)))
+      {
+       switch (TREE_CODE (pi->loop_limit))
+         {
+         case INTEGER_CST:
+           if (!int_fits_type_p (pi->loop_limit,
+                                 TREE_TYPE (pi->loop_index_name)))
+             goto done;
+           break;
+         case SSA_NAME:
+           {
+             tree v = pi->loop_limit;
+             gimple def = SSA_NAME_DEF_STMT (v);
+
+             /* Backtrack through CONVERT_EXPRs and/or NOP_EXPRs to
+                determine if the variables "started out" as the same
+                type.  */
+             while (gimple_code (def) == GIMPLE_ASSIGN)
+               {
+                 enum tree_code rhs_code = gimple_assign_rhs_code (def);
+
+                 if (rhs_code != NOP_EXPR && rhs_code != CONVERT_EXPR)
+                   break;
+
+                 v = gimple_assign_rhs1 (def);
+                 def = SSA_NAME_DEF_STMT (v);
+               }
+             /* Permit comparisons between non-compatible types with
+                flag_unsafe_loop_optimizations, since we can assume the
+                loop index does not overflow.  */
+             if (types_compatible_p (TREE_TYPE (pi->loop_index_name),
+                                     TREE_TYPE (v))
+                 || flag_unsafe_loop_optimizations)
+               break;
+             /* Fallthrough.  */
+           default:
+             goto done;
+           }
+         }
+      }
+  }
+
+  if (increment == NULL_TREE)
+    goto done;
+  if (TREE_CODE (increment) != INTEGER_CST
+      || compare_tree_int (increment, 1) != 0)
+    {
+      warn = true;
+      if (!flag_unsafe_loop_optimizations)
+       goto done;
+    }
+
+  ok = true;
+ done:
+  if (warn && !pi->warned)
+    {
+      pi->warned = true;
+      /* We can promote unsigned indices only if -funsafe-loop-optimizations
+        is in effect, since the user might be depending on the modulo
+        wraparound behavior of unsigned types.  */
+      if (warn_unsafe_loop_optimizations)
+       {
+         const char *wording;
+
+         wording = (flag_unsafe_loop_optimizations
+                    ? N_("assuming that the loop counter does not overflow")
+                    : N_("cannot optimize loop, the loop counter may overflow"));
+         warning (OPT_Wunsafe_loop_optimizations, "%s", gettext (wording));
+       }
+    }
+
+  return ok;
+}
+
+/* Analyze the loop associated with PI_ to see if its loop index can be
+   promoted.  */
+
+static bool
+analyze_loop (const void *pi_, void *data)
+{
+  struct promote_info *pi = CONST_CAST (struct promote_info *,
+                                       (const struct promote_info *) pi_);
+  bool *changed = (bool *) data;
+
+  /* We previously determined we can't promote this; go ahead and
+     continue iterating.  */
+  if (pi->loop_index_name == NULL_TREE)
+    return true;
+
+  /* Assume we can always promote the loop index, even if it doesn't
+     exist.  */
+  pi->can_be_promoted_p = true;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Analyzing ");
+      print_generic_expr (dump_file, pi->loop_index_name, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  if (pi->loop_index_name
+      && analyze_loop_index_definition_pattern (pi))
+    {
+      /* Clear any previously gathered information.  */
+      VEC_truncate (tree, pi->cast_types, 0);
+      VEC_truncate (int, pi->cast_counts, 0);
+
+      walk_use_def_chains (pi->loop_index_name, analyze_loop_index, pi, false);
+    }
+  else
+    pi->can_be_promoted_p = false;
+
+  /* If we determined the loop index is used in strange ways, clear it
+     so we don't examine it again.  */
+  if (!pi->can_be_promoted_p)
+    pi->loop_index_name = NULL_TREE;
+
+  /* Let our caller know whether to re-do the analysis.  */
+  *changed = *changed || !pi->can_be_promoted_p;
+  /* Continue if PI is promotable.  */
+  return pi->can_be_promoted_p;
+}
+
+/* Add PI_->LOOP_INDEX_NAME to the set of variables, DATA, that we are
+   considering for promotion.  */
+
+static bool
+add_variable (const void *pi_, void *data ATTRIBUTE_UNUSED)
+{
+  const struct promote_info *pi = (const struct promote_info *) pi_;
+  struct pointer_set_t *pset = (struct pointer_set_t *) data;
+  int presentp;
+
+  if (pi->loop_index_name != NULL_TREE)
+    {
+      presentp = pointer_set_insert (pset, pi->loop_index_name);
+      gcc_assert (!presentp);
+    }
+
+  /* Continue traversal.  */
+  return true;
+}
+
+/* For each promotable variable:
+
+   - create a new, promoted VAR_DECL;
+
+   - walk through all the uses and defs and create new statements using
+     the promoted variables.  We don't create new phi nodes; post-pass
+     SSA update will handle those for us.  */
+
+/* Make dump files readable.  */
+#define PROMOTED_VAR_SUFFIX ".promoted"
+
+/* Create a variable NAME with TYPE and do the necessary work to inform
+   the SSA machinery about it.  */
+
+static tree
+create_pli_var (tree type, char *name)
+{
+  tree var = create_tmp_var (type, name);
+  create_var_ann (var);
+  mark_sym_for_renaming (var);
+  add_referenced_var (var);
+  return var;
+}
+
+/* Associate the SSA_NAME VAR with the promoted variable DATA.  */
+
+static bool
+associate_name_with_var (tree var, gimple def_stmt, void *data)
+{
+  tree promoted_var = (tree) data;
+  void **p;
+
+  gcc_assert (promoted_var != NULL_TREE);
+
+  if (gimple_code (def_stmt) == GIMPLE_PHI)
+    var = PHI_RESULT (def_stmt);
+
+  p = pointer_map_insert (variable_map, var);
+
+  if (!*p)
+    {
+      if (dump_file)
+       {
+         fprintf (dump_file, "Associating ");
+         print_generic_expr (dump_file, var, 0);
+         fprintf (dump_file, " with ");
+         print_generic_expr (dump_file, promoted_var, 0);
+         fprintf (dump_file, "\n\n");
+       }
+      *(tree *)p = promoted_var;
+    }
+
+  /* Continue traversal.  */
+  return false;
+}
+
+/* Create a promoted variable for the variable from PI_.  */
+
+static bool
+create_promoted_variable (const void *pi_, void *data ATTRIBUTE_UNUSED)
+{
+  struct promote_info *pi = CONST_CAST (struct promote_info *,
+                                       (const struct promote_info *) pi_);
+
+  if (pi->can_be_promoted_p)
+    {
+      tree type = choose_profitable_promoted_type (pi);
+      tree orig_name = DECL_NAME (pi->var_decl);
+      size_t id_len = IDENTIFIER_LENGTH (orig_name);
+      size_t name_len = id_len + strlen (PROMOTED_VAR_SUFFIX) + 1;
+      char *name;
+
+      name = (char *) alloca (name_len);
+      strcpy (name, IDENTIFIER_POINTER (orig_name));
+      strcpy (name + id_len, PROMOTED_VAR_SUFFIX);
+
+      pi->promoted_type = type;
+      pi->promoted_var = create_pli_var (type, name);
+
+      if (dump_file)
+       {
+         fprintf (dump_file, "Created new variable ");
+         print_generic_expr (dump_file, pi->promoted_var, 0);
+         fprintf (dump_file, " to stand in for ");
+         print_generic_expr (dump_file, pi->loop_index_name, 0);
+         fprintf (dump_file, "\n\n");
+       }
+
+      walk_use_def_chains (pi->loop_index_name,
+                          associate_name_with_var,
+                          pi->promoted_var, false);
+    }
+
+  /* Continue traversal.  */
+  return true;
+}
+
+/* Rebuild T with newly promoted variables; STMT is the original
+   statement in which T appeared and may be equivalent to T.  TYPE is
+   non-null when rebuilding the rhs of a GIMPLE_ASSIGN and indicates the
+   type of the lhs.  */
+
+static tree
+rebuild_tree_with_promotion (tree t, gimple stmt, tree type,
+                            gimple_stmt_iterator gsi,
+                            struct promote_info *pi)
+{
+  tree op0, op1;
+
+  switch (TREE_CODE (t))
+    {
+    case NOP_EXPR:
+    case CONVERT_EXPR:
+      {
+       tree pvar = rebuild_tree_with_promotion (TREE_OPERAND (t, 0), stmt, type, gsi, pi);
+
+       if (types_compatible_p (type, TREE_TYPE (pvar)))
+         return pvar;
+       else
+         return build1 (TREE_CODE (t), type, pvar);
+      }
+    case INTEGER_CST:
+      {
+       return build_int_cst_wide (pi->promoted_type,
+                                  TREE_INT_CST_LOW (t),
+                                  TREE_INT_CST_HIGH (t));
+      }
+    case COND_EXPR:
+      {
+       tree orig_op0 = TREE_OPERAND (t, 0);
+       op0 = rebuild_tree_with_promotion (orig_op0, stmt, type, gsi, pi);
+       gcc_assert (orig_op0 != op0);
+       TREE_OPERAND (t, 0) = op0;
+       return t;
+      }
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case MULT_EXPR:
+      type = pi->promoted_type;
+      goto binary_expr;
+    case EQ_EXPR:
+    case NE_EXPR:
+    case LT_EXPR:
+    case LE_EXPR:
+    case GT_EXPR:
+    case GE_EXPR:
+      type = TREE_TYPE (t);
+    binary_expr:
+      op0 = TREE_OPERAND (t, 0);
+      op1 = TREE_OPERAND (t, 1);
+      op0 = rebuild_tree_with_promotion (op0, stmt, type, gsi, pi);
+      op1 = rebuild_tree_with_promotion (op1, stmt, type, gsi, pi);
+      return build2 (TREE_CODE (t), type, op0, op1);
+    case SSA_NAME:
+      {
+       void **p = pointer_map_contains (variable_map, t);
+
+       if (p == NULL)
+         {
+           /* This is unexpected, but it does happen if we were dealing
+              with COND_EXPRs and such.  Just go ahead and create a
+              temporary for it.  */
+           if (types_compatible_p (TREE_TYPE (t), pi->promoted_type)
+               || SSA_NAME_DEF_STMT (t) == stmt)
+             return t;
+           else
+             goto insert_cast;
+         }
+       else
+         return *(tree *)p;
+      }
+    case VAR_DECL:
+      return t;
+    default:
+    insert_cast:
+      {
+       gimple cast;
+       tree tmp, nop;
+       tree to_upcast = t;
+
+       /* If we are dealing with a memory reference, then we can't have
+          wrap it in a NOP_EXPR; we need to load the value from memory
+          first, then convert it.  */
+       if (!is_gimple_reg (to_upcast))
+         {
+           tree tmp = create_pli_var (TREE_TYPE (to_upcast),
+                                      CONST_CAST (char *, "loadtmp"));
+           gimple stmt = gimple_build_assign (tmp, to_upcast);
+           gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+           to_upcast = tmp;
+         }
+
+       tmp = create_pli_var (pi->promoted_type,
+                             CONST_CAST (char *, "promotetmp"));
+       nop = build1 (NOP_EXPR, pi->promoted_type, to_upcast);
+       cast = gimple_build_assign (tmp, nop);
+       if (dump_file)
+         {
+           fprintf (dump_file, "Inserting cast ");
+           print_gimple_stmt (dump_file, cast, 0, 0);
+           fprintf (dump_file, " prior to ");
+           print_gimple_stmt (dump_file, stmt, 0, 0);
+           fprintf (dump_file, "\n");
+         }
+       gsi_insert_before (&gsi, cast, GSI_SAME_STMT);
+       return tmp;
+      }
+    }
+}
+
+/* Split E and place STMT in the block created by doing so.  */
+
+static void
+insert_along_edge (gimple stmt, edge e)
+{
+  basic_block bb = split_edge (e);
+
+  gimple_set_bb (stmt, bb);
+  set_bb_seq (bb, gimple_seq_alloc_with_stmt (stmt));
+}
+
+/* Rebuild STMT, which contains uses or a def of the promotable variable
+   associated with PI.  */
+
+static void
+rebuild_with_promotion (gimple stmt, struct promote_info *pi)
+{
+  gimple_stmt_iterator gsi;
+
+  if (pointer_set_insert (promoted_stmts, stmt))
+    return;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Rebuilding stmt ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  gsi = gsi_for_stmt (stmt);
+
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_ASSIGN:
+      {
+       enum tree_code subcode = gimple_assign_rhs_code (stmt);
+       enum tree_code newcode = subcode;
+       tree lhs = gimple_assign_lhs (stmt);
+       tree rhs1 = gimple_assign_rhs1 (stmt);
+       tree rhs2 = gimple_assign_rhs2 (stmt);
+       tree x, y;
+       void **v;
+
+       /* If we are defining a promotable variable, check for special
+          idioms.  */
+       v = pointer_map_contains (variable_map, lhs);
+       if (v != NULL
+           && *(tree *)v == pi->promoted_var
+           && (subcode == NOP_EXPR || subcode == CONVERT_EXPR)
+           && signed_arithmetic_overflow_idiom_p (rhs1, &x, &y))
+         {
+           void **xp;
+           void **yp;
+           if (TYPE_PRECISION (TREE_TYPE (rhs1))
+               >= TYPE_PRECISION (pi->promoted_type))
+             goto done;
+
+           /* It's possible that we've already promoted the operands of
+              one or both of the NOP_EXPRs.  In that case, we can
+              bypass the logic below and go straight to rebuilding the
+              rhs that we really want to transform.  */
+           if (TREE_CODE (x) == VAR_DECL
+               || TREE_CODE (y) == VAR_DECL)
+             goto build_fake;
+           xp = pointer_map_contains (variable_map, x);
+           yp = pointer_map_contains (variable_map, y);
+
+           /* Nothing to see here.  */
+           if (!types_compatible_p (TREE_TYPE (x),
+                                    TREE_TYPE (y))
+               || (xp == NULL && yp == NULL))
+             goto done;
+           x = (xp == NULL ? NULL_TREE : *(tree *)xp);
+           y = (yp == NULL ? NULL_TREE : *(tree *)yp);
+
+           if (x != pi->promoted_var && y != pi->promoted_var)
+             goto done;
+
+         build_fake:
+           newcode = PLUS_EXPR;
+           rhs1 = x;
+           rhs2 = y;
+           if (dump_file)
+             {
+               fprintf (dump_file, "Substituting ");
+               print_generic_expr (dump_file, x, 0);
+               fprintf (dump_file, " + ");
+               print_generic_expr (dump_file, y, 0);
+               fprintf (dump_file, " for rhs of original statement\n");
+             }
+
+         done:
+           ;
+         }
+
+       lhs = rebuild_tree_with_promotion (lhs, stmt, NULL, gsi, pi);
+       rhs1 = rebuild_tree_with_promotion (rhs1, stmt, NULL, gsi, pi);
+       if (rhs2)
+         rhs2 = rebuild_tree_with_promotion (rhs2, stmt, NULL, gsi, pi);
+
+       if (newcode != subcode)
+         {
+           gimple newstmt = gimple_build_assign_with_ops (newcode,
+                                                          lhs, rhs1, rhs2);
+           gsi_replace (&gsi, newstmt, true);
+           stmt = newstmt;
+         }
+       else
+         {
+           gimple_assign_set_lhs (stmt, lhs);
+           gimple_assign_set_rhs1 (stmt, rhs1);
+           if (rhs2)
+             gimple_assign_set_rhs2 (stmt, rhs2);
+         }
+      }
+      break;
+    case GIMPLE_COND:
+      {
+       tree lhs = gimple_cond_lhs (stmt);
+       tree rhs = gimple_cond_rhs (stmt);
+
+       lhs = rebuild_tree_with_promotion (lhs, stmt, NULL, gsi, pi);
+       rhs = rebuild_tree_with_promotion (rhs, stmt, NULL, gsi, pi);
+
+       gimple_cond_set_lhs (stmt, lhs);
+       gimple_cond_set_rhs (stmt, rhs);
+      }
+      break;
+    case GIMPLE_PHI:
+      {
+	unsigned int i;
+	bool promoted_result = could_be_promoted (PHI_RESULT (stmt));
+
+	for (i = 0; i < gimple_phi_num_args (stmt); i++)
+	  {
+	    tree var = gimple_phi_arg_def (stmt, i);
+	    edge e = gimple_phi_arg_edge (stmt, i);
+	    gimple assign = NULL;
+
+	    if (TREE_CODE (var) == INTEGER_CST && promoted_result)
+	      {
+		tree cst = build_int_cst_wide (pi->promoted_type,
+					       TREE_INT_CST_LOW (var),
+					       TREE_INT_CST_HIGH (var));
+
+		assign = gimple_build_assign (pi->promoted_var, cst);
+		insert_along_edge (assign, e);
+	      }
+	    else if (TREE_CODE (var) == SSA_NAME
+		     && SSA_NAME_VAR (var) == pi->var_decl
+		     && !promoted_result)
+	      {
+		tree t = create_pli_var (TREE_TYPE (PHI_RESULT (stmt)),
+					 CONST_CAST (char *, "promotetmp"));
+		tree name;
+		assign = gimple_build_assign_with_ops (CONVERT_EXPR,
+						       t, pi->promoted_var,
+						       NULL_TREE);
+
+		name = make_ssa_name (t, assign);
+		gimple_assign_set_lhs (assign, name);
+
+		insert_along_edge (assign, e);
+		SET_PHI_ARG_DEF (stmt, i, name);
+	      }
+	  }
+      }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Converted stmt ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+      fprintf (dump_file, "\n\n");
+    }
+  update_stmt (stmt);
+}
+
+/* Helper function for promote_variable that walks over use/def
+   chains.  */
+
+static bool
+promote_variable_1 (tree var, gimple stmt, void *data)
+{
+  struct promote_info *pi = (struct promote_info *) data;
+  imm_use_iterator imi;
+  gimple use_stmt;
+
+  rebuild_with_promotion (stmt, pi);
+
+  if (gimple_code (stmt) == GIMPLE_PHI)
+    var = PHI_RESULT (stmt);
+
+  if (could_be_promoted (var))
+    FOR_EACH_IMM_USE_STMT (use_stmt, imi, var)
+      {
+	rebuild_with_promotion (use_stmt, pi);
+      }
+
+  return false;
+}
+
+/* Convert all uses and defs of PI_->LOOP_INDEX_NAME as linked by
+   use-def chains to uses and defs of PI_->PROMOTED_VAR.  */
+
+static bool
+promote_variable (const void *pi_, void *data ATTRIBUTE_UNUSED)
+{
+  const struct promote_info *pi = (const struct promote_info *) pi_;
+
+  if (pi->can_be_promoted_p)
+    {
+      walk_use_def_chains (pi->loop_index_name, promote_variable_1,
+                          CONST_CAST (struct promote_info *, pi), false);
+    }
+
+  /* Continue traversal.  */
+  return true;
+}
+
+/* Free PI_ and its associated data.  */
+
+static bool
+free_pi_entries (const void *pi_, void *data ATTRIBUTE_UNUSED)
+{
+  struct promote_info *pi = CONST_CAST (struct promote_info *,
+                                       (const struct promote_info *) pi_);
+
+  VEC_free (tree, heap, pi->cast_types);
+  VEC_free (int, heap, pi->cast_counts);
+  free (pi);
+
+  /* Continue traversal.  */
+  return true;
+}
+
+/* Collect information about variables that we believe to be loop
+   indices in PROMOTION_INFO.  */
+
+static void
+collect_promotion_candidates (void)
+{
+  loop_iterator li;
+  struct loop *loop;
+
+  FOR_EACH_LOOP (li, loop, 0)
+    {
+      basic_block header = loop->header;
+      gimple exit_cond = last_stmt (header);
+
+      if (exit_cond && gimple_code (exit_cond) == GIMPLE_COND)
+       {
+         tree loop_index;
+         tree limit = NULL_TREE;
+         tree decl;
+         struct promote_info *pi;
+
+         loop_index = find_promotion_candidate (loop, exit_cond, &limit);
+         if (loop_index == NULL_TREE)
+           continue;
+         decl = SSA_NAME_VAR (loop_index);
+         if (TREE_ADDRESSABLE (decl))
+           continue;
+
+         if (dump_file)
+           {
+             fprintf (dump_file, "Found loop index ");
+             print_generic_expr (dump_file, loop_index, 0);
+             fprintf (dump_file, " involved in ");
+             print_gimple_stmt (dump_file, exit_cond, 0, 0);
+             fprintf (dump_file, "\n\n");
+           }
+
+         pi = XCNEW (struct promote_info);
+         pi->loop = loop;
+         pi->exit_expr = exit_cond;
+         pi->loop_index_name = loop_index;
+         pi->loop_limit = limit;
+         pi->var_decl = decl;
+         /* We think so, anyway...  */
+         pi->can_be_promoted_p = true;
+         pointer_set_insert (promotion_info, pi);
+       }
+      else if (dump_file)
+       {
+         fprintf (dump_file, "\nSkipping analysis of loop %d (header bb #%d)\n",
+                  loop->num, loop->header->index);
+         if (exit_cond)
+           {
+             fprintf (dump_file, "Exit condition was ");
+             print_gimple_stmt (dump_file, exit_cond, 0, 0);
+             fprintf (dump_file, "\n");
+           }
+       }
+    }
+}
+
+/* Free memory associated with global variables that we used.  */
+
+static void
+pli_cleanup (void)
+{
+  if (promoted_stmts)
+    {
+      pointer_set_destroy (promoted_stmts);
+      promoted_stmts = NULL;
+    }
+  if (variable_map)
+    {
+      pointer_map_destroy (variable_map);
+      variable_map = NULL;
+    }
+  if (promotable_names)
+    {
+      pointer_set_destroy (promotable_names);
+      promotable_names = NULL;
+    }
+  if (promotion_info)
+    {
+      pointer_set_traverse (promotion_info, free_pi_entries, NULL);
+      pointer_set_destroy (promotion_info);
+      promotion_info = NULL;
+    }
+}
+
+/* The guts of the pass.  */
+
+static unsigned int
+promote_short_indices (void)
+{
+  bool did_something = false;
+  bool changed;
+  size_t max_iterations, i, n_promoted;
+
+  promotion_info = pointer_set_create ();
+  collect_promotion_candidates ();
+
+  if (dump_file)
+    fprintf (dump_file, "Found %d candidates for promotion\n",
+            (int) pointer_set_n_elements (promotion_info));
+
+  /* Nothing to do.  */
+  if (pointer_set_n_elements (promotion_info) == 0)
+    goto cleanup;
+
+  /* We have information about which variables are loop index variables.
+     We now need to determine the promotability of the loop indices.
+     Since the promotability of loop indices may depend on other loop
+     indices, we need to repeat this until we reach a fixed point.  */
+  changed = true;
+  max_iterations = pointer_set_n_elements (promotion_info);
+  i = 0;
+
+  promotable_names = pointer_set_create ();
+
+  while (changed)
+    {
+      changed = false;
+      pointer_set_clear (promotable_names);
+      pointer_set_traverse (promotion_info, add_variable,
+                           promotable_names);
+      n_promoted = pointer_set_n_elements (promotable_names);
+
+      if (dump_file)
+       fprintf (dump_file, "\nIteration %d, have %d variables to consider\n",
+                (int) i, (int) n_promoted);
+
+      if (n_promoted == 0)
+       break;
+      gcc_assert (i < max_iterations);
+      pointer_set_traverse (promotion_info, analyze_loop, &changed);
+      i++;
+    }
+
+  if (dump_file)
+    fprintf (dump_file, "Promoting %d variables\n",
+            (int) n_promoted);
+
+  if (n_promoted != 0)
+    {
+      did_something = true;
+      variable_map = pointer_map_create ();
+      promoted_stmts = pointer_set_create ();
+      pointer_set_traverse (promotion_info, create_promoted_variable, NULL);
+      pointer_set_traverse (promotion_info, promote_variable, NULL);
+    }
+
+ cleanup:
+  pli_cleanup ();
+  return did_something ? TODO_update_ssa : 0;
+}
+
+/* Entry point for the short loop index promotion pass.  */
+
+static unsigned int
+tree_short_index_promotion (void)
+{
+  unsigned int changed = 0;
+
+  /* Initialize all the necessary loop infrastructure.  */
+  loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES | LOOPS_HAVE_RECORDED_EXITS);
+  add_noreturn_fake_exit_edges ();
+  connect_infinite_loops_to_exit ();
+
+  if (number_of_loops () > 1)
+    changed = promote_short_indices ();
+
+  /* Tear down loop optimization infrastructure.  */
+  remove_fake_exit_edges ();
+  free_numbers_of_iterations_estimates ();
+  loop_optimizer_finalize ();
+
+  return changed;
+}
+
+static bool
+gate_short_index_promotion (void)
+{
+  return optimize > 0 && flag_promote_loop_indices;
+}
+
+struct gimple_opt_pass pass_promote_indices =
+{
+  {
+    GIMPLE_PASS,
+    "promoteshort",                    /* name */
+    gate_short_index_promotion,                /* gate */
+    tree_short_index_promotion,                /* execute */
+    NULL,                              /* sub */
+    NULL,                              /* next */
+    0,                                 /* static_pass_number */
+    TV_TREE_LOOP_PROMOTE,              /* tv_id */
+    PROP_cfg | PROP_ssa,               /* properties_required */
+    0,                                 /* properties_provided */
+    0,                                 /* properties_destroyed */
+    0,                                 /* todo_flags_start */
+    TODO_dump_func | TODO_verify_loops
+    | TODO_ggc_collect                 /* todo_flags_finish */
+  }
+};
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -104,6 +104,10 @@
    In order to make it fully redundant, we insert the expression into
    the predecessors where it is not available, but is ANTIC.

+   When optimizing for size, we only eliminate the partial redundancy
+   if we need to insert in only one predecessor.  This avoids almost
+   completely the code size increase that PRE usually causes.
+
    For the partial anticipation case, we only perform insertion if it
    is partially anticipated in some block, and fully available in all
    of the predecessors.
@@ -429,6 +433,7 @@
 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr);
 static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr);
 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t);
+static void bitmap_set_and (bitmap_set_t, bitmap_set_t);
 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int);
 static void bitmap_insert_into_set (bitmap_set_t, pre_expr);
 static void bitmap_insert_into_set_1 (bitmap_set_t, pre_expr, bool);
@@ -2988,13 +2993,6 @@
   tree temp;
   gimple phi;

-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      fprintf (dump_file, "Found partial redundancy for expression ");
-      print_pre_expr (dump_file, expr);
-      fprintf (dump_file, " (%04d)\n", val);
-    }
-
   /* Make sure we aren't creating an induction variable.  */
   if (block->loop_depth > 0 && EDGE_COUNT (block->preds) == 2
       && expr->kind != REFERENCE)
@@ -3192,6 +3190,47 @@
 }


+/* Indicate if, when optimizing for speed, it is appropriate to make
+   INSERTS_NEEDED insertions in order to make EXPR in BLOCK redundant.  */
+static bool
+ppre_n_insert_for_speed_p (pre_expr expr, basic_block block,
+			   unsigned int inserts_needed)
+{
+  /* The more expensive EXPR is, the more we should be prepared to insert
+     in the predecessors of BLOCK to make EXPR fully redundant.
+     For now, only recognize AND, OR, XOR, PLUS and MINUS of a multiple-use
+     SSA_NAME with a constant as cheap.  */
+  int cost;
+
+  if (flag_tree_pre_partial_partial_obliviously)
+    return true;
+  if (expr->kind == NARY)
+    {
+      vn_nary_op_t nary = PRE_EXPR_NARY (expr);
+      switch (nary->opcode)
+	{
+	  tree name, cnst;
+	case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR:
+	case PLUS_EXPR: case MINUS_EXPR:
+
+	  gcc_assert (nary->length == 2);
+	  name = nary->op[0];
+	  cnst = nary->op[1];
+	  if (TREE_CODE (name) != SSA_NAME || has_single_use (name))
+	    return true;
+	  if (!is_gimple_min_invariant (cnst))
+	    return true;
+	  cost = 1;
+	  break;
+	default:
+	  return true;
+	}
+    }
+  else
+    return true;
+  return EDGE_COUNT (block->preds) * cost >= inserts_needed;
+
+}

 /* Perform insertion of partially redundant values.
    For BLOCK, do the following:
@@ -3226,6 +3265,7 @@
 	  pre_expr *avail;
 	  unsigned int val;
 	  bool by_some = false;
+	  unsigned int inserts_needed = 0;
 	  bool cant_insert = false;
 	  bool all_same = true;
 	  pre_expr first_s = NULL;
@@ -3280,6 +3320,7 @@
 		{
 		  avail[bprime->index] = eprime;
 		  all_same = false;
+		  inserts_needed++;
 		}
 	      else
 		{
@@ -3289,6 +3330,11 @@
 		    first_s = edoubleprime;
 		  else if (!pre_expr_eq (first_s, edoubleprime))
 		    all_same = false;
+		  /* If the available value is not a NAME, PREing this
+		     value will probably result in a copy on the edge
+		     to assign the expression to a register.  */
+		  if (edoubleprime->kind != NAME)
+		    inserts_needed++;
 		}
 	    }
 	  /* If we can insert it, it's not the same value
@@ -3297,9 +3343,27 @@
 	     partially redundant.  */
 	  if (!cant_insert && !all_same && by_some && dbg_cnt (treepre_insert))
 	    {
-	      if (insert_into_preds_of_block (block, get_expression_id (expr),
-					      avail))
-		new_stuff = true;
+  	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file,
+			   "Found partial redundancy for expression ");
+		  print_pre_expr (dump_file, expr);
+		  fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+		}
+
+	      /* If optimizing for size, insert at most one
+		 new expression to avoid increasing code size.  */
+	      if (optimize_function_for_speed_p (cfun)
+		  ? 1 : EDGE_COUNT (block->preds) - inserts_needed == 1)
+		new_stuff |=
+		  insert_into_preds_of_block (block,
+					      get_expression_id (expr),
+					      avail);
+	      else if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+			 optimize_function_for_speed_p (cfun)
+			 ? "speed" : "size");
+
 	    }
 	  /* If all edges produce the same value and that value is
 	     an invariant, then the PHI has the same value on all
@@ -3428,9 +3492,28 @@
 	  if (!cant_insert && by_all && dbg_cnt (treepre_insert))
 	    {
 	      pre_stats.pa_insert++;
-	      if (insert_into_preds_of_block (block, get_expression_id (expr),
-					      avail))
-		new_stuff = true;
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file,
+			   "Found partial redundancy for expression ");
+		  print_pre_expr (dump_file, expr);
+		  fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+		}
+	      /* Assuming the expression is 50% anticipatable, we have to
+		 multiply the number of insertions needed by two for a cost
+		 comparison.  */
+	      if (!optimize_function_for_speed_p (cfun)
+		  || ppre_n_insert_for_speed_p (expr, block,
+						2 * EDGE_COUNT (block->preds)))
+		new_stuff |=
+		  insert_into_preds_of_block (block,
+					      get_expression_id (expr),
+					      avail);
+	      else if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+			 optimize_function_for_speed_p (cfun)
+			 ? "speed" : "size");
+
 	    }
 	  free (avail);
 	}
@@ -3471,7 +3554,9 @@
 	  if (!single_pred_p (block))
 	    {
 	      new_stuff |= do_regular_insertion (block, dom);
-	      if (do_partial_partial)
+	      /* Don't bother with partial-partial redundancies when
+		 optimizing for size.  */
+	      if (do_partial_partial && ! optimize_function_for_size_p (cfun))
 		new_stuff |= do_partial_partial_insertion (block, dom);
 	    }
 	}
@@ -4220,11 +4305,11 @@
    only wants to do full redundancy elimination.  */

 static unsigned int
-execute_pre (bool do_fre ATTRIBUTE_UNUSED)
+execute_pre (bool do_fre)
 {
   unsigned int todo = 0;

-  do_partial_partial = optimize > 2;
+  do_partial_partial = flag_tree_pre_partial_partial;

   /* This has to happen before SCCVN runs because
      loop_optimizer_init may create new phis, etc.  */
@@ -4297,19 +4382,20 @@
   return todo;
 }

-/* Gate and execute functions for PRE.  */
+/* Gate and execute functions for FRE/PRE.  */

 static unsigned int
 do_pre (void)
 {
-  return TODO_rebuild_alias | execute_pre (false);
+  return TODO_rebuild_alias
+	 | execute_pre (! flag_tree_pre);
 }

 static bool
 gate_pre (void)
 {
-  /* PRE tends to generate bigger code.  */
-  return flag_tree_pre != 0 && optimize_function_for_speed_p (cfun);
+  /* Run FRE even if we don't run PRE.  */
+  return (flag_tree_fre || flag_tree_pre);
 }

 struct gimple_opt_pass pass_pre =
--- /dev/null
+++ b/gcc/tree-ssa-remove-local-statics.c
@@ -0,0 +1,868 @@
+/* Local static variable elimination pass.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   Contributed by Nathan Froyd <froydnj@codesourcery.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Converting static function-local variables to automatic variables.
+
+   The motivating example is a function like:
+
+   void
+   foo (unsigned n)
+   {
+     static int var;
+     unsigned i;
+
+     for (i = 0; i != n; i++)
+       {
+         var = ...
+
+         do other things with var...
+       }
+   }
+
+   Because VAR is static, doing things like code motion to loads and
+   stores of VAR is difficult.  Furthermore, accesses to VAR are
+   inefficient.  This pass aims to recognize the cases where it is not
+   necessary for VAR to be static and modify the code so that later
+   passes will do the appropriate optimizations.
+
+   The criteria for a static function-local variable V in a function F
+   being converted to an automatic variable are:
+
+   1. F does not call setjmp; and
+   2. V's address is never taken; and
+   3. V is not declared volatile; and
+   4. V is not used in any nested function;
+   5. V is not an aggregate value (union, struct, array, etc.); and
+   6. Every use of V is defined along all paths leading to the use.
+
+   NOTE: For ease of implementation, we currently treat a function call
+   as killing all previous definitions of static variables, since we
+   could have:
+
+   static void
+   foo (...)
+   {
+     static int x;
+
+     x = ...;       (1)
+
+    f (...);        (2)
+
+     ... = x;       (3)
+   }
+
+   The use at (3) needs to pick up a possible definition made by the
+   call at (2).  If the call at (2) does not call back into 'foo',
+   then the call is not a killing call.  We currently treat it as
+   though it is.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#include "rtl.h"
+#include "tm_p.h"
+#include "hard-reg-set.h"
+#include "obstack.h"
+#include "basic-block.h"
+
+#include "tree.h"
+#include "gimple.h"
+#include "hashtab.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "flags.h"
+#include "timevar.h"
+#include "tree-pass.h"
+
+struct rls_decl_info
+{
+  /* The variable declaration.  */
+  tree orig_var;
+
+  /* Its index in rls_block_local_data.  */
+  int index;
+
+  /* Whether we can optimize this variable.  */
+  bool optimizable_p;
+
+  /* The new variable declaration, if we can optimize away the staticness
+     of 'orig_var'.  */
+  tree new_var;
+};
+
+/* Filled with 'struct rls_decl_info'; keyed off ORIG_VAR.  */
+static htab_t static_variables;
+
+struct rls_stmt_info
+{
+  /* The variable declaration.  */
+  tree var;
+
+  /* The statement in which we found a def or a use of the variable.  */
+  gimple stmt;
+
+  /* Whether STMT represents a use of VAR.  */
+  bool use_p;
+
+  /* A bitmap whose entries denote what variables have been defined
+     when execution arrives at STMT.  This field is only used when
+     USE_P is true.  */
+  sbitmap defined;
+};
+
+/* Filled with 'struct rls_stmt_info'; keyed off STMT.  */
+static htab_t defuse_statements;
+
+static struct
+{
+  /* The number of static variables we found.  */
+  size_t n_statics;
+
+  /* The number of optimizable variables we found.  */
+  size_t n_optimizable;
+} stats;
+
+struct rls_block_dataflow_data {
+  /* A bitmap whose entries denote what variables have been defined on
+     entry to this block.  */
+  sbitmap defined_in;
+
+  /* A bitmap whose entries denote what variables have been defined on
+     exit from this block.  */
+  sbitmap defined_out;
+};
+
+/* Parameters for the 'static_variables' hash table.  */
+
+static hashval_t
+rls_hash_decl_info (const void *x)
+{
+  return htab_hash_pointer
+    ((const void *) ((const struct rls_decl_info *) x)->orig_var);
+}
+
+static int
+rls_eq_decl_info (const void *x, const void *y)
+{
+  const struct rls_decl_info *a = (const struct rls_decl_info *) x;
+  const struct rls_decl_info *b = (const struct rls_decl_info *) y;
+
+  return a->orig_var == b->orig_var;
+}
+
+static void
+rls_free_decl_info (void *info)
+{
+  free (info);
+}
+
+/* Parameters for the 'defuse_statements' hash table.  */
+
+static hashval_t
+rls_hash_use_info (const void *x)
+{
+  return htab_hash_pointer
+    ((const void *) ((const struct rls_stmt_info *) x)->stmt);
+}
+
+static int
+rls_eq_use_info (const void *x, const void *y)
+{
+  const struct rls_stmt_info *a = (const struct rls_stmt_info *) x;
+  const struct rls_stmt_info *b = (const struct rls_stmt_info *) y;
+
+  return a->stmt == b->stmt;
+}
+
+static void
+rls_free_use_info (void *info)
+{
+  struct rls_stmt_info *stmt_info = (struct rls_stmt_info *) info;
+
+  if (stmt_info->defined)
+    sbitmap_free (stmt_info->defined);
+
+  free (stmt_info);
+}
+
+/* Initialize data structures and statistics.  */
+
+static void
+rls_init (void)
+{
+  basic_block bb;
+
+  /* We expect relatively few static variables, hence the small
+     initial size for the hash table.  */
+  static_variables = htab_create (8, rls_hash_decl_info,
+                                  rls_eq_decl_info, rls_free_decl_info);
+
+  /* We expect quite a few statements.  */
+  defuse_statements = htab_create (128, rls_hash_use_info,
+                                   rls_eq_use_info, rls_free_use_info);
+
+  FOR_ALL_BB (bb)
+    {
+      struct rls_block_dataflow_data *data;
+
+      data = XNEW (struct rls_block_dataflow_data);
+      memset (data, 0, sizeof (*data));
+      bb->aux = data;
+    }
+
+  stats.n_statics = 0;
+  stats.n_optimizable = 0;
+}
+
+/* Free data structures.  */
+
+static void
+rls_done (void)
+{
+  basic_block bb;
+
+  htab_delete (static_variables);
+  htab_delete (defuse_statements);
+
+  FOR_ALL_BB (bb)
+    {
+      struct rls_block_dataflow_data *data
+	= (struct rls_block_dataflow_data *) bb->aux;
+
+      gcc_assert (data);
+
+      if (data->defined_in)
+	sbitmap_free (data->defined_in);
+      if (data->defined_out)
+	sbitmap_free (data->defined_out);
+      free (data);
+      bb->aux = NULL;
+    }
+}
+
+
+/* Doing the initial work to find static variables.  */
+
+/* Examine the defining statement for VAR and determine whether it is a
+   static variable we could potentially optimize.  If so, stick in it
+   in the 'static_variables' hashtable.
+
+   STMT is the statement in which a definition or use of VAR occurs.
+   USE_P indicates whether VAR is used or defined in STMT.  Enter STMT
+   into 'defuse_statements' as well for use during dataflow
+   analysis.  */
+
+static void
+maybe_discover_new_declaration (tree var, gimple stmt, bool use_p)
+{
+  tree def_stmt = SSA_NAME_VAR (var);
+
+  if (TREE_CODE (def_stmt) == VAR_DECL
+      && DECL_CONTEXT (def_stmt) != NULL_TREE
+      && TREE_CODE (DECL_CONTEXT (def_stmt)) == FUNCTION_DECL
+      /* We cannot optimize away a static used in multiple functions (as
+	 might happen in C++).  */
+      && !DECL_NONLOCAL(def_stmt)
+      && TREE_STATIC (def_stmt)
+      /* We cannot optimize away aggregate statics, as we would have to
+	 prove that definitions of every field of the aggregate dominate
+	 uses.  */
+      && !AGGREGATE_TYPE_P (TREE_TYPE (def_stmt))
+      /* GCC doesn't normally treat vectors as aggregates; we need to,
+	 though, since a user could use intrinsics to read/write
+	 particular fields of the vector, thereby treating it as an
+	 array.  */
+      && TREE_CODE (TREE_TYPE (def_stmt)) != VECTOR_TYPE
+      && !TREE_ADDRESSABLE (def_stmt)
+      && !TREE_THIS_VOLATILE (def_stmt))
+    {
+      struct rls_decl_info dummy;
+      void **slot;
+
+      dummy.orig_var = def_stmt;
+      slot = htab_find_slot (static_variables, &dummy, INSERT);
+
+      if (*slot == NULL)
+        {
+          /* Found a use or a def of a new declaration.  */
+          struct rls_decl_info *info = XNEW (struct rls_decl_info);
+
+          info->orig_var = def_stmt;
+          info->index = stats.n_statics++;
+          /* Optimistically assume that we can optimize.  */
+          info->optimizable_p = true;
+          info->new_var = NULL_TREE;
+          *slot = (void *) info;
+        }
+
+      /* Enter the statement into DEFUSE_STATEMENTS.  */
+      {
+        struct rls_stmt_info dummy;
+        struct rls_stmt_info *info;
+
+        dummy.stmt = stmt;
+        slot = htab_find_slot (defuse_statements, &dummy, INSERT);
+
+        /* We should never insert the same statement into the
+           hashtable twice.  */
+        gcc_assert (*slot == NULL
+		    || ((struct rls_stmt_info *)(*slot))->stmt == stmt);
+
+	if (*slot != NULL && ((struct rls_stmt_info *)(*slot))->stmt == stmt)
+	  return;
+
+        info = XNEW (struct rls_stmt_info);
+        info->var = def_stmt;
+        info->stmt = stmt;
+        if (dump_file)
+          {
+            fprintf (dump_file, "entering as %s ", use_p ? "use" : "def");
+            print_gimple_stmt (dump_file, stmt, 0, TDF_DETAILS | TDF_VOPS);
+          }
+        info->use_p = use_p;
+        /* We don't know how big to make the bitmap yet.  */
+        info->defined = NULL;
+        *slot = (void *) info;
+      }
+    }
+}
+
+/* Grovel through all the statements in the program, looking for
+   SSA_NAMEs whose SSA_NAME_VAR is a VAR_DECL.  We look at both use and
+   def SSA_NAMEs.  */
+
+static void
+find_static_nonvolatile_declarations (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB (bb)
+    {
+      gimple_stmt_iterator i;
+
+      for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+        {
+          tree var;
+          ssa_op_iter iter;
+	  gimple stmt = gsi_stmt (i);
+
+	  /* If there's a call expression in STMT, then previous passes
+	     will have determined if the call transitively defines some
+	     static variable.  However, we need more precise
+	     information--we need to know whether static variables are
+	     live out after the call.
+
+	     Since we'll never see something like:
+
+	       staticvar = foo (bar, baz);
+
+	     in GIMPLE (the result of the call will be assigned to a
+	     normal, non-static local variable which is then assigned to
+	     STATICVAR in a subsequent statement), don't bother finding
+	     new declarations if we see a GIMPLE_CALL.
+
+	     In a similar fashion, asm statements that clobber memory
+	     will appear to define static variables.  In general,
+	     however, assuming that asm statements define static
+	     variables would cause us to see that in the following
+	     situation:
+
+	       static int foo = 0;
+
+	       __asm__ (... : "memory");
+	       foo++;
+
+	     foo could be unstaticized because the asm has "defined"
+	     foo.  This is likely false.  (Even if the asm does happen
+	     to define foo--and only foo--that situation would be
+	     sufficiently unusual that not optimizing it seems OK.)  */
+	  if (gimple_code (stmt) != GIMPLE_CALL
+	      && gimple_code (stmt) != GIMPLE_ASM)
+	    FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_VDEF)
+	      {
+		maybe_discover_new_declaration (var, stmt, false);
+	      }
+
+          FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_VUSE)
+            {
+              maybe_discover_new_declaration (var, stmt, true);
+            }
+        }
+    }
+}
+
+
+/* Determining if we have anything to optimize.  */
+
+/* Examine *SLOT (which is a 'struct rls_decl_info *') to see whether
+   the associated variable is optimizable.  If it is, create a new,
+   non-static declaration for the variable; this new variable will be
+   used during a subsequent rewrite of the function.  */
+
+#define NEW_VAR_PREFIX ".unstatic"
+
+static int
+maybe_create_new_variable (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct rls_decl_info *info = (struct rls_decl_info *) *slot;
+  tree id_node = DECL_NAME (info->orig_var);
+  size_t id_len = IDENTIFIER_LENGTH (id_node);
+  size_t name_len = id_len + strlen (NEW_VAR_PREFIX) + 1;
+  char *name;
+
+  /* Don't create a new variable multiple times.  */
+  gcc_assert (!info->new_var);
+
+  /* Tie the new name to the old one to aid debugging dumps.  */
+  name = (char *) alloca (name_len);
+  strcpy (name, IDENTIFIER_POINTER (id_node));
+  strcpy (name + id_len, NEW_VAR_PREFIX);
+  info->new_var = create_tmp_var (TREE_TYPE (info->orig_var), name);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "new variable ");
+      print_generic_expr (dump_file, info->new_var, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Inform SSA about this new variable.  */
+  create_var_ann (info->new_var);
+  mark_sym_for_renaming (info->new_var);
+  /* We need to make sure we rebuild bits for the original variable,
+     such as virtual operands attached to statements.  */
+  mark_sym_for_renaming (info->orig_var);
+  add_referenced_var (info->new_var);
+
+  /* Always continue scanning.  */
+  return 1;
+}
+
+#undef NEW_VAR_PREFIX
+
+/* Traverse the 'defuse_statements' hash table.  For every use,
+   determine if the associated variable is defined along all paths
+   leading to said use.  Remove the associated variable from
+   'static_variables' if it is not.  */
+
+static int
+check_definedness (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct rls_stmt_info *info = (struct rls_stmt_info *) *slot;
+  struct rls_decl_info dummy;
+
+  /* We don't need to look at definitions.  Continue scanning.  */
+  if (!info->use_p)
+    return 1;
+
+  dummy.orig_var = info->var;
+  slot = htab_find_slot (static_variables, &dummy, INSERT);
+
+  /* Might not be there because we deleted it already.  */
+  if (*slot)
+    {
+      struct rls_decl_info *decl = (struct rls_decl_info *) *slot;
+
+      if (!TEST_BIT (info->defined, decl->index))
+        {
+          if (dump_file)
+            {
+              fprintf (dump_file, "not optimizing ");
+              print_generic_expr (dump_file, decl->orig_var, 0);
+              fprintf (dump_file, "due to uncovered use in ");
+              print_gimple_stmt (dump_file, info->stmt, 0, 0);
+              fprintf (dump_file, "\n");
+            }
+
+          htab_clear_slot (static_variables, slot);
+          stats.n_optimizable--;
+        }
+    }
+
+  /* Continue scan.  */
+  return 1;
+}
+
+/* Check all statements in 'defuse_statements' to see if all the
+   statements that use a static variable have that variable defined
+   along all paths leading to the statement.  Once that's done, go
+   through and create new, non-static variables for any static variables
+   that can be optimized.  */
+
+static size_t
+determine_optimizable_statics (void)
+{
+  htab_traverse (defuse_statements, check_definedness, NULL);
+
+  htab_traverse (static_variables, maybe_create_new_variable, NULL);
+
+  return stats.n_optimizable;
+}
+
+/* Look at STMT to see if we have uses or defs of a static variable.
+   STMT is passed in DATA.  Definitions of a static variable are found
+   by the presence of a V_MUST_DEF, while uses are found by the presence
+   of a VUSE.  */
+
+static int
+unstaticize_variable (void **slot, void *data)
+{
+  struct rls_decl_info *info = (struct rls_decl_info *) *slot;
+  gimple stmt = (gimple) data;
+  tree vdef;
+  tree vuse;
+  int continue_scan = 1;
+
+  /* We should have removed unoptimizable variables during an earlier
+     traversal.  */
+  gcc_assert (info->optimizable_p);
+
+  /* Check for virtual definitions first.  */
+  vdef = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_VDEF);
+
+  if (vdef != NULL
+      && ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)
+      && gimple_code (stmt) == GIMPLE_ASSIGN
+      && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
+      && gimple_assign_lhs(stmt) == info->orig_var)
+    {
+      /* Make the statement define the new name.  The new name has
+         already been marked for renaming, so no need to do that
+         here.  */
+      gimple_assign_set_lhs (stmt, info->new_var);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "found virtual definition!\n");
+	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_DETAILS);
+	  fprintf (dump_file, "\n");
+	}
+      continue_scan = 0;
+      goto done;
+    }
+
+  /* Check for virtual uses.  */
+  vuse = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_VUSE);
+
+  if (vuse != NULL
+      && gimple_code (stmt) == GIMPLE_ASSIGN
+      && gimple_assign_rhs_code (stmt) == VAR_DECL
+      && gimple_assign_rhs1 (stmt) == info->orig_var)
+    {
+      /* Make the statement use the new name.  */
+      gimple_assign_set_rhs1 (stmt, info->new_var);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "found virtual use!\n");
+	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_DETAILS);
+	  fprintf (dump_file, "\n");
+	}
+      continue_scan = 0;
+      goto done;
+    }
+
+ done:
+  if (!continue_scan)
+    {
+      /* None of the other optimizable static variables can occur
+         in this statement.  Stop the scan.  */
+      update_stmt (stmt);
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, "updated stmt\n");
+	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_DETAILS);
+	}
+    }
+
+  return continue_scan;
+}
+
+/* Determine if we have any static variables we can optimize.  If so,
+   replace any defs or uses of those variables in their defining/using
+   statements.  */
+
+static void
+maybe_remove_static_from_declarations (void)
+{
+  size_t n_optimizable = determine_optimizable_statics ();
+  basic_block bb;
+
+  if (n_optimizable)
+    /* Replace any optimizable variables with new, non-static variables.  */
+    FOR_EACH_BB (bb)
+      {
+        gimple_stmt_iterator gsi;
+
+        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+          {
+            gimple stmt = gsi_stmt (gsi);
+
+            htab_traverse (static_variables, unstaticize_variable, stmt);
+          }
+      }
+}
+
+/* Callback for htab_traverse to initialize the bitmap for *SLOT, which
+   is a 'struct rls_stmt_info'.  */
+
+static int
+initialize_statement_dataflow (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct rls_stmt_info *info = (struct rls_stmt_info *) *slot;
+
+  gcc_assert (!info->defined);
+
+  if (info->use_p)
+    {
+      info->defined = sbitmap_alloc (stats.n_statics);
+      /* Assume defined along all paths until otherwise informed.  */
+      sbitmap_ones (info->defined);
+    }
+
+  /* Continue traversal.  */
+  return 1;
+}
+
+/* We have N_STATICS static variables to consider.  Go through all the
+   blocks and all the use statements to initialize their bitmaps.  */
+
+static void
+initialize_block_and_statement_dataflow (size_t n_statics)
+{
+  basic_block bb;
+
+  FOR_ALL_BB (bb)
+    {
+      struct rls_block_dataflow_data *data
+	= (struct rls_block_dataflow_data *) bb->aux;
+
+      gcc_assert (data);
+
+      data->defined_in = sbitmap_alloc (n_statics);
+      sbitmap_zero (data->defined_in);
+      data->defined_out = sbitmap_alloc (n_statics);
+      sbitmap_zero (data->defined_out);
+    }
+
+  htab_traverse (defuse_statements, initialize_statement_dataflow, NULL);
+}
+
+/* Apply the individual effects of the stmts in BB to update the
+   dataflow analysis information for BB.  */
+
+static void
+compute_definedness_for_block (basic_block bb)
+{
+  bool changed_p = false;
+  struct rls_block_dataflow_data *data
+	= (struct rls_block_dataflow_data *) bb->aux;
+  gimple_stmt_iterator gsi;
+
+  sbitmap_copy (data->defined_out, data->defined_in);
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+
+      if (gimple_code (stmt) == GIMPLE_CALL)
+	/* If there's a call expression in STMT, then previous passes
+	   will have determined if the call transitively defines some
+	   static variable.  However, we need more precise
+	   information--we need to know whether static variables are
+	   live out after the call.  In the absence of such information,
+	   simply declare that all static variables are clobbered by the
+	   call.  A better analysis would be interprocedural and compute
+	   the liveness information we require, but for now, we're being
+	   pessimistic.  */
+	sbitmap_zero (data->defined_out);
+      else
+	{
+	  struct rls_stmt_info dummy;
+	  void **slot;
+
+	  /* See if this statement uses or defines a static variable.  */
+	  dummy.stmt = stmt;
+	  slot = htab_find_slot (defuse_statements, &dummy, INSERT);
+
+	  /* Check for uses.  */
+	  if (*slot != NULL)
+	    {
+	      struct rls_stmt_info *info = (struct rls_stmt_info *) *slot;
+
+	      if (info->use_p)
+		{
+		  gcc_assert (info->defined);
+
+		  /* Found a statement that uses a function-local static
+		     variable.  Copy the current state of definedness.  */
+		  sbitmap_copy (info->defined, data->defined_out);
+		}
+	      else
+		{
+		  struct rls_decl_info dummy;
+		  struct rls_decl_info *decl;
+
+		  gcc_assert (!info->defined);
+
+		  /* Found a statement that defines a function-local static
+		     variable.  Look up the associated variable's information
+		     and mark it as defined in the block.  */
+		  dummy.orig_var = info->var;
+		  slot = htab_find_slot (static_variables, &dummy, INSERT);
+
+		  gcc_assert (*slot);
+
+		  decl = (struct rls_decl_info *) *slot;
+
+		  SET_BIT (data->defined_out, decl->index);
+		  changed_p |= true;
+		}
+	    }
+	}
+    }
+}
+
+/* Solve the dataflow equations:
+
+   DEFINED_IN(b) = intersect DEFINED_OUT(p) for p in preds(b)
+   DEFINED_OUT(b) = VARIABLES_DEFINED (b, DEFINED_IN (b))
+
+   via a simple iterative solver.  VARIABLES_DEFINED is computed by
+   'compute_definedness_for_block'.  */
+
+static void
+compute_definedness (void)
+{
+  basic_block bb;
+  bool changed_p;
+  sbitmap tmp_bitmap = sbitmap_alloc (stats.n_statics);
+
+  /* Compute initial sets.  */
+  FOR_EACH_BB (bb)
+    {
+      compute_definedness_for_block (bb);
+    }
+
+  /* Iterate.  */
+  do {
+    changed_p = false;
+
+    FOR_EACH_BB (bb)
+      {
+        edge e;
+        edge_iterator ei;
+        struct rls_block_dataflow_data *data
+	  = (struct rls_block_dataflow_data *) bb->aux;
+        bool bitmap_changed_p = false;
+
+        sbitmap_ones (tmp_bitmap);
+
+        gcc_assert (data);
+
+        /* We require information about whether a variable was defined
+           over all paths leading to a particular use.  Therefore, we
+           intersect the DEFINED sets of all predecessors.  */
+        FOR_EACH_EDGE (e, ei, bb->preds)
+          {
+            struct rls_block_dataflow_data *pred_data
+	      = (struct rls_block_dataflow_data *) e->src->aux;
+
+            gcc_assert (pred_data);
+
+            sbitmap_a_and_b (tmp_bitmap, tmp_bitmap, pred_data->defined_out);
+          }
+
+        bitmap_changed_p = !sbitmap_equal (tmp_bitmap, data->defined_in);
+
+        if (bitmap_changed_p)
+          {
+            sbitmap_copy (data->defined_in, tmp_bitmap);
+            compute_definedness_for_block (bb);
+          }
+
+        changed_p |= bitmap_changed_p;
+      }
+  } while (changed_p);
+
+  sbitmap_free (tmp_bitmap);
+}
+
+static unsigned int
+execute_rls (void)
+{
+  rls_init ();
+
+  find_static_nonvolatile_declarations ();
+
+  /* Can we optimize anything?  */
+  if (stats.n_statics != 0)
+    {
+      stats.n_optimizable = stats.n_statics;
+
+      if (dump_file)
+        fprintf (dump_file, "found %d static variables to consider\n",
+                 stats.n_statics);
+
+      initialize_block_and_statement_dataflow (stats.n_statics);
+
+      compute_definedness ();
+
+      maybe_remove_static_from_declarations ();
+
+      if (dump_file)
+        fprintf (dump_file, "removed %d static variables\n",
+                 stats.n_optimizable);
+    }
+
+  rls_done ();
+
+  return 0;
+}
+
+static bool
+gate_rls (void)
+{
+  return (flag_remove_local_statics != 0
+          && !cfun->calls_setjmp
+          && !cgraph_node (current_function_decl)->ever_was_nested);
+}
+
+struct gimple_opt_pass pass_remove_local_statics =
+{
+  {
+    GIMPLE_PASS,
+    "remlocstatic",               /* name */
+    gate_rls,                     /* gate */
+    execute_rls,                  /* execute */
+    NULL,                         /* sub */
+    NULL,                         /* next */
+    0,                            /* static_pass_number */
+    TV_TREE_RLS,                  /* tv_id */
+    PROP_cfg | PROP_ssa,          /* properties_required */
+    0,                            /* properties_provided */
+    0,                            /* properties_destroyed */
+    0,                            /* todo_flags_start */
+    TODO_dump_func | TODO_verify_ssa | TODO_verify_stmts
+    | TODO_rebuild_alias | TODO_update_ssa /* todo_flags_finish */
+  }
+};
--- a/gcc/tree-ssa-sink.c
+++ b/gcc/tree-ssa-sink.c
@@ -449,6 +449,47 @@
 	  last = false;
 	  continue;
 	}
+
+      /* We cannot move statements that contain references to block-scope
+	 variables out of that block, as this may lead to incorrect aliasing
+	 when we lay out the stack frame in cfgexpand.c.
+	 In lieu of more sophisticated analysis, be very conservative here
+	 and prohibit moving any statement that references memory out of a
+	 block with variables.  */
+      if (gimple_references_memory_p (stmt))
+	{
+	  tree fromblock = gimple_block (stmt);
+	  while (fromblock
+		 && fromblock != current_function_decl
+		 && !BLOCK_VARS (fromblock))
+	    fromblock = BLOCK_SUPERCONTEXT (fromblock);
+	  if (fromblock && fromblock != current_function_decl)
+	    {
+	      gimple tostmt;
+	      tree toblock;
+
+	      if (gsi_end_p (togsi))
+		tostmt = gimple_seq_last_stmt (gsi_seq (togsi));
+	      else
+		tostmt = gsi_stmt (togsi);
+	      if (tostmt)
+		toblock = gimple_block (tostmt);
+	      else
+		toblock = NULL;
+	      while (toblock
+		     && toblock != current_function_decl
+		     && toblock != fromblock)
+		toblock = BLOCK_SUPERCONTEXT (toblock);
+	      if (!toblock || toblock != fromblock)
+		{
+		  if (!gsi_end_p (gsi))
+		    gsi_prev (&gsi);
+		  last = false;
+		  continue;
+		}
+	    }
+	}
+
       if (dump_file)
 	{
 	  fprintf (dump_file, "Sinking ");
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -2926,7 +2926,8 @@
   /* Some people like to do cute things like take the address of
      &0->a.b */
   forzero = t;
-  while (!SSA_VAR_P (forzero) && !CONSTANT_CLASS_P (forzero))
+  while (!SSA_VAR_P (forzero) && TREE_CODE (forzero) != FUNCTION_DECL
+	 && !CONSTANT_CLASS_P (forzero))
     forzero = TREE_OPERAND (forzero, 0);

   if (CONSTANT_CLASS_P (forzero) && integer_zerop (forzero))
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1459,7 +1459,7 @@
     }

   base = build_fold_indirect_ref (base_addr);
-  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
+  alignment = ssize_int (targetm.vectorize.vector_min_alignment (vectype));

   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
       || !misalign)
@@ -1510,7 +1510,8 @@
   /* At this point we assume that the base is aligned.  */
   gcc_assert (base_aligned
 	      || (TREE_CODE (base) == VAR_DECL
-		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
+		  && (DECL_ALIGN (base)
+		      >= targetm.vectorize.vector_min_alignment (vectype))));

   /* Modulo alignment.  */
   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -2868,11 +2868,13 @@
        vnode = vnode->next_needed)
     {
       tree vectype, decl = vnode->decl;
+      tree t;
       unsigned int alignment;

-      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+      t = TREE_TYPE(decl);
+      if (TREE_CODE (t) != ARRAY_TYPE)
 	continue;
-      vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
+      vectype = get_vectype_for_scalar_type (strip_array_types (t));
       if (!vectype)
 	continue;
       alignment = TYPE_ALIGN (vectype);
@@ -2887,6 +2889,7 @@
 	    {
 	      fprintf (dump_file, "Increasing alignment of decl: ");
 	      print_generic_expr (dump_file, decl, TDF_SLIM);
+	      fprintf (dump_file, "\n");
 	    }
 	}
     }
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -5442,7 +5442,21 @@
 	       vect_permute_store_chain().  */
 	    vec_oprnd = VEC_index (tree, result_chain, i);

-	  data_ref = build_fold_indirect_ref (dataref_ptr);
+	  if (alignment_support_scheme == dr_aligned
+	      && !targetm.vectorize.always_misalign(vectype))
+	    {
+	      data_ref = build_fold_indirect_ref (dataref_ptr);
+	    }
+	  else
+	    {
+	      /* TODO: Record actual alignment in always_misalign case.  */
+	      int mis = DR_MISALIGNMENT (first_dr);
+	      tree tmis;
+	      tmis = (mis == -1 ? size_zero_node : size_int (mis));
+	      tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
+	      data_ref =
+		build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
+	    }

 	  /* Arguments are ready. Create the new vector stmt.  */
 	  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
@@ -6621,10 +6635,15 @@
 	    {
 	    case dr_aligned:
 	      gcc_assert (aligned_access_p (first_dr));
-	      data_ref = build_fold_indirect_ref (dataref_ptr);
-	      break;
+	      if (!targetm.vectorize.always_misalign(vectype))
+		{
+		  data_ref = build_fold_indirect_ref (dataref_ptr);
+		  break;
+		}
+	      /* Fall through... */
 	    case dr_unaligned_supported:
 	      {
+		/* TODO: Record actual alignment in always_misalign case.  */
 		int mis = DR_MISALIGNMENT (first_dr);
 		tree tmis = (mis == -1 ? size_zero_node : size_int (mis));

@@ -7595,7 +7614,7 @@
   gimple dr_stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
+  int vectype_align = targetm.vectorize.vector_min_alignment (vectype);
   tree niters_type = TREE_TYPE (loop_niters);
   int step = 1;
   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
--- a/gcc/unwind-dw2.c
+++ b/gcc/unwind-dw2.c
@@ -1414,16 +1414,12 @@
 /* Fill in CONTEXT for top-of-stack.  The only valid registers at this
    level will be the return address and the CFA.  */

-#define uw_init_context(CONTEXT)					   \
-  do									   \
-    {									   \
-      /* Do any necessary initialization to access arbitrary stack frames. \
-	 On the SPARC, this means flushing the register windows.  */	   \
-      __builtin_unwind_init ();						   \
-      uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (),		   \
-			 __builtin_return_address (0));			   \
-    }									   \
-  while (0)
+#define uw_init_context(CONTEXT)					\
+  /* Do any necessary initialization to access arbitrary stack frames.	\
+     On the SPARC, this means flushing the register windows.  */	\
+  (__builtin_unwind_init (),						\
+   uw_init_context_1 ((CONTEXT), __builtin_dwarf_cfa (),		\
+		      __builtin_return_address (0)))

 static inline void
 init_dwarf_reg_size_table (void)
@@ -1431,7 +1427,7 @@
   __builtin_init_dwarf_reg_size_table (dwarf_reg_size_table);
 }

-static void
+static _Unwind_Reason_Code
 uw_init_context_1 (struct _Unwind_Context *context,
 		   void *outer_cfa, void *outer_ra)
 {
@@ -1445,7 +1441,8 @@
   context->flags = EXTENDED_CONTEXT_BIT;

   code = uw_frame_state_for (context, &fs);
-  gcc_assert (code == _URC_NO_REASON);
+  if (code != _URC_NO_REASON)
+    return code;

 #if __GTHREADS
   {
@@ -1471,6 +1468,8 @@
      initialization context, then we can't see it in the given
      call frame data.  So have the initialization context tell us.  */
   context->ra = __builtin_extract_return_addr (outer_ra);
+
+  return _URC_NO_REASON;
 }


--- a/gcc/unwind.inc
+++ b/gcc/unwind.inc
@@ -85,7 +85,8 @@
   _Unwind_Reason_Code code;

   /* Set up this_context to describe the current stack frame.  */
-  uw_init_context (&this_context);
+  code = uw_init_context (&this_context);
+  gcc_assert (code == _URC_NO_REASON);
   cur_context = this_context;

   /* Phase 1: Search.  Unwind the stack, calling the personality routine
@@ -198,7 +199,8 @@
   struct _Unwind_Context this_context, cur_context;
   _Unwind_Reason_Code code;

-  uw_init_context (&this_context);
+  code = uw_init_context (&this_context);
+  gcc_assert (code == _URC_NO_REASON);
   cur_context = this_context;

   exc->private_1 = (_Unwind_Ptr) stop;
@@ -221,7 +223,8 @@
   struct _Unwind_Context this_context, cur_context;
   _Unwind_Reason_Code code;

-  uw_init_context (&this_context);
+  code = uw_init_context (&this_context);
+  gcc_assert (code == _URC_NO_REASON);
   cur_context = this_context;

   /* Choose between continuing to process _Unwind_RaiseException
@@ -251,7 +254,8 @@
   if (exc->private_1 == 0)
     return _Unwind_RaiseException (exc);

-  uw_init_context (&this_context);
+  code = uw_init_context (&this_context);
+  gcc_assert (code == _URC_NO_REASON);
   cur_context = this_context;

   code = _Unwind_ForcedUnwind_Phase2 (exc, &cur_context);
@@ -280,7 +284,9 @@
   struct _Unwind_Context context;
   _Unwind_Reason_Code code;

-  uw_init_context (&context);
+  code = uw_init_context (&context);
+  if (code != _URC_NO_REASON)
+    return _URC_FATAL_PHASE1_ERROR;

   while (1)
     {
--- a/gcc/unwind-sjlj.c
+++ b/gcc/unwind-sjlj.c
@@ -292,10 +292,11 @@
   uw_update_context (context, fs);
 }

-static inline void
+static inline _Unwind_Reason_Code
 uw_init_context (struct _Unwind_Context *context)
 {
   context->fc = _Unwind_SjLj_GetContext ();
+  return _URC_NO_REASON;
 }

 static void __attribute__((noreturn))
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1126,11 +1126,14 @@
     {
 #ifdef DATA_ALIGNMENT
       unsigned int data_align = DATA_ALIGNMENT (TREE_TYPE (decl), align);
+#else
+      unsigned int data_align = align;
+#endif
+      data_align = alignment_for_aligned_arrays (TREE_TYPE (decl), data_align);
       /* Don't increase alignment too much for TLS variables - TLS space
 	 is too precious.  */
       if (! DECL_THREAD_LOCAL_P (decl) || data_align <= BITS_PER_WORD)
 	align = data_align;
-#endif
 #ifdef CONSTANT_ALIGNMENT
       if (DECL_INITIAL (decl) != 0 && DECL_INITIAL (decl) != error_mark_node)
 	{
@@ -3196,6 +3199,10 @@
   set_mem_alias_set (rtl, 0);
   set_mem_alias_set (rtl, const_alias_set);

+  /* We cannot share RTX'es in pool entries.
+     Mark this piece of RTL as required for unsharing.  */
+  RTX_FLAG (rtl, used) = 1;
+
   /* Set flags or add text to the name to record information, such as
      that it is a local symbol.  If the name is changed, the macro
      ASM_OUTPUT_LABELREF will have to know how to strip this
--- a/gcc/vmsdbgout.c
+++ b/gcc/vmsdbgout.c
@@ -211,6 +211,7 @@
    debug_nothing_int,		  /* handle_pch */
    debug_nothing_rtx,		  /* var_location */
    debug_nothing_void,            /* switch_text_section */
+   debug_nothing_tree_tree,	  /* set_name */
    0                              /* start_end_main_source_file */
 };

--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -2299,13 +2299,6 @@
   run_directive (pfile, type, str, count);
 }

-/* The number of errors for a given reader.  */
-unsigned int
-cpp_errors (cpp_reader *pfile)
-{
-  return pfile->errors;
-}
-
 /* The options structure.  */
 cpp_options *
 cpp_get_options (cpp_reader *pfile)
--- a/libcpp/errors.c
+++ b/libcpp/errors.c
@@ -28,171 +28,69 @@
 #include "cpplib.h"
 #include "internal.h"

-static void print_location (cpp_reader *, source_location, unsigned int);
-
-/* Print the logical file location (LINE, COL) in preparation for a
-   diagnostic.  Outputs the #include chain if it has changed.  A line
-   of zero suppresses the include stack, and outputs the program name
-   instead.  */
-static void
-print_location (cpp_reader *pfile, source_location line, unsigned int col)
-{
-  if (line == 0)
-    fprintf (stderr, "%s: ", progname);
-  else
-    {
-      const struct line_map *map;
-      linenum_type lin;
-
-      map = linemap_lookup (pfile->line_table, line);
-      linemap_print_containing_files (pfile->line_table, map);
-
-      lin = SOURCE_LINE (map, line);
-      if (col == 0)
-	{
-	  col = SOURCE_COLUMN (map, line);
-	  if (col == 0)
-	    col = 1;
-	}
-
-      if (lin == 0)
-	fprintf (stderr, "%s:", map->to_file);
-      else if (CPP_OPTION (pfile, show_column) == 0)
-	fprintf (stderr, "%s:%u:", map->to_file, lin);
-      else
-	fprintf (stderr, "%s:%u:%u:", map->to_file, lin, col);
-
-      fputc (' ', stderr);
-    }
-}
-
-/* Set up for a diagnostic: print the file and line, bump the error
-   counter, etc.  SRC_LOC is the logical line number; zero means to print
-   at the location of the previously lexed token, which tends to be
-   the correct place by default.  The column number can be specified either
-   using COLUMN or (if COLUMN==0) extracting SOURCE_COLUMN from SRC_LOC.
-   (This may seem redundant, but is useful when pre-scanning (cleaning) a line,
-   when we haven't yet verified whether the current line_map has a
-   big enough max_column_hint.)
-
-   Returns 0 if the error has been suppressed.  */
-static int
-_cpp_begin_message (cpp_reader *pfile, int code,
-		    source_location src_loc, unsigned int column)
-{
-  int level = CPP_DL_EXTRACT (code);
-
-  switch (level)
-    {
-    case CPP_DL_WARNING:
-    case CPP_DL_PEDWARN:
-      if (cpp_in_system_header (pfile)
-	  && ! CPP_OPTION (pfile, warn_system_headers))
-	return 0;
-      /* Fall through.  */
-
-    case CPP_DL_WARNING_SYSHDR:
-      if (CPP_OPTION (pfile, warnings_are_errors)
-	  || (level == CPP_DL_PEDWARN && CPP_OPTION (pfile, pedantic_errors)))
-	{
-	  if (CPP_OPTION (pfile, inhibit_errors))
-	    return 0;
-	  level = CPP_DL_ERROR;
-	  pfile->errors++;
-	}
-      else if (CPP_OPTION (pfile, inhibit_warnings))
-	return 0;
-      break;
-
-    case CPP_DL_ERROR:
-      if (CPP_OPTION (pfile, inhibit_errors))
-	return 0;
-      /* ICEs cannot be inhibited.  */
-    case CPP_DL_ICE:
-      pfile->errors++;
-      break;
-    }
-
-  print_location (pfile, src_loc, column);
-  if (CPP_DL_WARNING_P (level))
-    fputs (_("warning: "), stderr);
-  else if (level == CPP_DL_ICE)
-    fputs (_("internal error: "), stderr);
-  else
-    fputs (_("error: "), stderr);
-
-  return 1;
-}
-
-/* Don't remove the blank before do, as otherwise the exgettext
-   script will mistake this as a function definition */
-#define v_message(msgid, ap) \
- do { vfprintf (stderr, _(msgid), ap); putc ('\n', stderr); } while (0)
-
-/* Exported interface.  */
-
 /* Print an error at the location of the previously lexed token.  */
-void
+bool
 cpp_error (cpp_reader * pfile, int level, const char *msgid, ...)
 {
   source_location src_loc;
   va_list ap;
-
+  bool ret;
+
   va_start (ap, msgid);

-  if (CPP_OPTION (pfile, client_diagnostic))
-    pfile->cb.error (pfile, level, _(msgid), &ap);
-  else
+  if (CPP_OPTION (pfile, traditional))
     {
-      if (CPP_OPTION (pfile, traditional))
-	{
-	  if (pfile->state.in_directive)
-	    src_loc = pfile->directive_line;
-	  else
-	    src_loc = pfile->line_table->highest_line;
-	}
-      /* We don't want to refer to a token before the beginning of the
-	 current run -- that is invalid.  */
-      else if (pfile->cur_token == pfile->cur_run->base)
-	{
-	  if (pfile->cur_run->prev != NULL)
-	    src_loc = pfile->cur_run->prev->limit->src_loc;
-	  else
-	    src_loc = 0;
-	}
+      if (pfile->state.in_directive)
+	src_loc = pfile->directive_line;
       else
-	{
-	  src_loc = pfile->cur_token[-1].src_loc;
-	}
-
-      if (_cpp_begin_message (pfile, level, src_loc, 0))
-	v_message (msgid, ap);
+	src_loc = pfile->line_table->highest_line;
+    }
+  /* We don't want to refer to a token before the beginning of the
+     current run -- that is invalid.  */
+  else if (pfile->cur_token == pfile->cur_run->base)
+    {
+      if (pfile->cur_run->prev != NULL)
+	src_loc = pfile->cur_run->prev->limit->src_loc;
+      else
+	src_loc = 0;
     }
+  else
+    {
+      src_loc = pfile->cur_token[-1].src_loc;
+    }
+
+  if (!pfile->cb.error)
+    abort ();
+  ret = pfile->cb.error (pfile, level, src_loc, 0, _(msgid), &ap);

   va_end (ap);
+  return ret;
 }

 /* Print an error at a specific location.  */
-void
+bool
 cpp_error_with_line (cpp_reader *pfile, int level,
 		     source_location src_loc, unsigned int column,
 		     const char *msgid, ...)
 {
   va_list ap;
+  bool ret;

   va_start (ap, msgid);

-  if (_cpp_begin_message (pfile, level, src_loc, column))
-    v_message (msgid, ap);
+  if (!pfile->cb.error)
+    abort ();
+  ret = pfile->cb.error (pfile, level, src_loc, column, _(msgid), &ap);

   va_end (ap);
+  return ret;
 }

-void
+bool
 cpp_errno (cpp_reader *pfile, int level, const char *msgid)
 {
   if (msgid[0] == '\0')
     msgid = _("stdout");

-  cpp_error (pfile, level, "%s: %s", msgid, xstrerror (errno));
+  return cpp_error (pfile, level, "%s: %s", msgid, xstrerror (errno));
 }
--- a/libcpp/files.c
+++ b/libcpp/files.c
@@ -488,7 +488,6 @@
 	      return file;
 	    }

-	  open_file_failed (pfile, file, angle_brackets);
 	  if (invalid_pch)
 	    {
 	      cpp_error (pfile, CPP_DL_ERROR,
@@ -497,6 +496,7 @@
 		cpp_error (pfile, CPP_DL_ERROR,
 			   "use -Winvalid-pch for more information");
 	    }
+	  open_file_failed (pfile, file, angle_brackets);
 	  break;
 	}

@@ -934,15 +934,28 @@

   errno = file->err_no;
   if (print_dep && CPP_OPTION (pfile, deps.missing_files) && errno == ENOENT)
-    deps_add_dep (pfile->deps, file->name);
+    {
+      deps_add_dep (pfile->deps, file->name);
+      /* If the preprocessor output (other than dependency information) is
+         being used, we must also flag an error.  */
+      if (CPP_OPTION (pfile, deps.need_preprocessor_output))
+	cpp_errno (pfile, CPP_DL_FATAL, file->path);
+    }
   else
     {
-      /* If we are outputting dependencies but not for this file then
-	 don't error because we can still produce correct output.  */
-      if (CPP_OPTION (pfile, deps.style) && ! print_dep)
-	cpp_errno (pfile, CPP_DL_WARNING, file->path);
+      /* If we are not outputting dependencies, or if we are and dependencies
+         were requested for this file, or if preprocessor output is needed
+         in addition to dependency information, this is an error.
+
+         Otherwise (outputting dependencies but not for this file, and not
+         using the preprocessor output), we can still produce correct output
+         so it's only a warning.  */
+      if (CPP_OPTION (pfile, deps.style) == DEPS_NONE
+          || print_dep
+          || CPP_OPTION (pfile, deps.need_preprocessor_output))
+	cpp_errno (pfile, CPP_DL_FATAL, file->path);
       else
-	cpp_errno (pfile, CPP_DL_ERROR, file->path);
+	cpp_errno (pfile, CPP_DL_WARNING, file->path);
     }
 }

--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -302,22 +302,9 @@
   /* Nonzero means print names of header files (-H).  */
   unsigned char print_include_names;

-  /* Nonzero means cpp_pedwarn causes a hard error.  */
-  unsigned char pedantic_errors;
-
-  /* Nonzero means don't print warning messages.  */
-  unsigned char inhibit_warnings;
-
   /* Nonzero means complain about deprecated features.  */
   unsigned char warn_deprecated;

-  /* Nonzero means don't suppress warnings from system headers.  */
-  unsigned char warn_system_headers;
-
-  /* Nonzero means don't print error messages.  Has no option to
-     select it, but can be set by a user of cpplib (e.g. fix-header).  */
-  unsigned char inhibit_errors;
-
   /* Nonzero means warn if slash-star appears in a comment.  */
   unsigned char warn_comments;

@@ -353,9 +340,6 @@
      explicitly undefined.  */
   unsigned char warn_builtin_macro_redefined;

-  /* Nonzero means turn warnings into errors.  */
-  unsigned char warnings_are_errors;
-
   /* Nonzero means we should look for header.gcc files that remap file
      names.  */
   unsigned char remap;
@@ -432,6 +416,10 @@

     /* If true, no dependency is generated on the main file.  */
     bool ignore_main_file;
+
+    /* If true, intend to use the preprocessor output (e.g., for compilation)
+       in addition to the dependency info.  */
+    bool need_preprocessor_output;
   } deps;

   /* Target-specific features set by the front end or client.  */
@@ -450,9 +438,6 @@
   /* Nonzero means __STDC__ should have the value 0 in system headers.  */
   unsigned char stdc_0_in_system_headers;

-  /* True means error callback should be used for diagnostics.  */
-  bool client_diagnostic;
-
   /* True disables tokenization outside of preprocessing directives. */
   bool directives_only;
 };
@@ -492,10 +477,11 @@
      be expanded.  */
   cpp_hashnode * (*macro_to_expand) (cpp_reader *, const cpp_token *);

-  /* Called to emit a diagnostic if client_diagnostic option is true.
-     This callback receives the translated message.  */
-  void (*error) (cpp_reader *, int, const char *, va_list *)
-       ATTRIBUTE_FPTR_PRINTF(3,0);
+  /* Called to emit a diagnostic.  This callback receives the
+     translated message.  */
+  bool (*error) (cpp_reader *, int, source_location, unsigned int,
+		 const char *, va_list *)
+       ATTRIBUTE_FPTR_PRINTF(5,0);

   /* Callbacks for when a macro is expanded, or tested (whether
      defined or not at the time) in #ifdef, #ifndef or "defined".  */
@@ -697,19 +683,13 @@

 /* Call this to finish preprocessing.  If you requested dependency
    generation, pass an open stream to write the information to,
-   otherwise NULL.  It is your responsibility to close the stream.
-
-   Returns cpp_errors (pfile).  */
-extern int cpp_finish (cpp_reader *, FILE *deps_stream);
+   otherwise NULL.  It is your responsibility to close the stream.  */
+extern void cpp_finish (cpp_reader *, FILE *deps_stream);

 /* Call this to release the handle at the end of preprocessing.  Any
-   use of the handle after this function returns is invalid.  Returns
-   cpp_errors (pfile).  */
+   use of the handle after this function returns is invalid.  */
 extern void cpp_destroy (cpp_reader *);

-/* Error count.  */
-extern unsigned int cpp_errors (cpp_reader *);
-
 extern unsigned int cpp_token_len (const cpp_token *);
 extern unsigned char *cpp_token_as_text (cpp_reader *, const cpp_token *);
 extern unsigned char *cpp_spell_token (cpp_reader *, const cpp_token *,
@@ -835,24 +815,23 @@
 /* An internal consistency check failed.  Prints "internal error: ",
    otherwise the same as CPP_DL_ERROR.  */
 #define CPP_DL_ICE		0x04
-/* Extracts a diagnostic level from an int.  */
-#define CPP_DL_EXTRACT(l)	(l & 0xf)
-/* Nonzero if a diagnostic level is one of the warnings.  */
-#define CPP_DL_WARNING_P(l)	(CPP_DL_EXTRACT (l) >= CPP_DL_WARNING \
-				 && CPP_DL_EXTRACT (l) <= CPP_DL_PEDWARN)
+/* An informative note following a warning.  */
+#define CPP_DL_NOTE		0x05
+/* A fatal error.  */
+#define CPP_DL_FATAL		0x06

 /* Output a diagnostic of some kind.  */
-extern void cpp_error (cpp_reader *, int, const char *msgid, ...)
+extern bool cpp_error (cpp_reader *, int, const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;

 /* Output a diagnostic with "MSGID: " preceding the
    error string of errno.  No location is printed.  */
-extern void cpp_errno (cpp_reader *, int, const char *msgid);
+extern bool cpp_errno (cpp_reader *, int, const char *msgid);

 /* Same as cpp_error, except additionally specifies a position as a
    (translation unit) physical line and physical column.  If the line is
    zero, then no location is printed.  */
-extern void cpp_error_with_line (cpp_reader *, int, source_location, unsigned,
+extern bool cpp_error_with_line (cpp_reader *, int, source_location, unsigned,
 				 const char *msgid, ...) ATTRIBUTE_PRINTF_5;

 /* In lex.c */
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -144,12 +144,6 @@
 extern const struct line_map *linemap_lookup
   (struct line_maps *, source_location);

-/* Print the file names and line numbers of the #include commands
-   which led to the map MAP, if any, to stderr.  Nothing is output if
-   the most recently listed stack is the same as the current one.  */
-extern void linemap_print_containing_files (struct line_maps *,
-					    const struct line_map *);
-
 /* Converts a map and a source_location to source line.  */
 #define SOURCE_LINE(MAP, LOC) \
   ((((LOC) - (MAP)->start_location) >> (MAP)->column_bits) + (MAP)->to_line)
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -616,12 +616,11 @@
 }

 /* This is called at the end of preprocessing.  It pops the last
-   buffer and writes dependency output, and returns the number of
-   errors.
+   buffer and writes dependency output.

    Maybe it should also reset state, such that you could call
    cpp_start_read with a new filename to restart processing.  */
-int
+void
 cpp_finish (cpp_reader *pfile, FILE *deps_stream)
 {
   /* Warn about unused macros before popping the final buffer.  */
@@ -636,9 +635,8 @@
   while (pfile->buffer)
     _cpp_pop_buffer (pfile);

-  /* Don't write the deps file if there are errors.  */
   if (CPP_OPTION (pfile, deps.style) != DEPS_NONE
-      && deps_stream && pfile->errors == 0)
+      && deps_stream)
     {
       deps_write (pfile->deps, deps_stream, 72);

@@ -649,8 +647,6 @@
   /* Report on headers that could use multiple include guards.  */
   if (CPP_OPTION (pfile, print_include_names))
     _cpp_report_missing_guards (pfile);
-
-  return pfile->errors;
 }

 static void
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -388,9 +388,6 @@
   /* Nonzero prevents the lexer from re-using the token runs.  */
   unsigned int keep_tokens;

-  /* Error counter for exit code.  */
-  unsigned int errors;
-
   /* Buffer to hold macro definition string.  */
   unsigned char *macro_buffer;
   unsigned int macro_buffer_len;
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -302,45 +302,6 @@
   return &set->maps[mn];
 }

-/* Print the file names and line numbers of the #include commands
-   which led to the map MAP, if any, to stderr.  Nothing is output if
-   the most recently listed stack is the same as the current one.  */
-
-void
-linemap_print_containing_files (struct line_maps *set,
-				const struct line_map *map)
-{
-  if (MAIN_FILE_P (map) || set->last_listed == map->included_from)
-    return;
-
-  set->last_listed = map->included_from;
-  map = INCLUDED_FROM (set, map);
-
-  fprintf (stderr,  _("In file included from %s:%u"),
-	   map->to_file, LAST_SOURCE_LINE (map));
-
-  while (! MAIN_FILE_P (map))
-    {
-      map = INCLUDED_FROM (set, map);
-      /* Translators note: this message is used in conjunction
-	 with "In file included from %s:%ld" and some other
-	 tricks.  We want something like this:
-
-	 | In file included from sys/select.h:123,
-	 |                  from sys/types.h:234,
-	 |                  from userfile.c:31:
-	 | bits/select.h:45: <error message here>
-
-	 with all the "from"s lined up.
-	 The trailing comma is at the beginning of this message,
-	 and the trailing colon is not translated.  */
-      fprintf (stderr, _(",\n                 from %s:%u"),
-	       map->to_file, LAST_SOURCE_LINE (map));
-    }
-
-  fputs (":\n", stderr);
-}
-
 /* Print an include trace, for e.g. the -H option of the preprocessor.  */

 static void
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -1833,11 +1833,13 @@

       if (warn_of_redefinition (pfile, node, macro))
 	{
-	  cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->directive_line, 0,
-			       "\"%s\" redefined", NODE_NAME (node));
+	  bool warned;
+	  warned = cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+					pfile->directive_line, 0,
+					"\"%s\" redefined", NODE_NAME (node));

-	  if (node->type == NT_MACRO && !(node->flags & NODE_BUILTIN))
-	    cpp_error_with_line (pfile, CPP_DL_PEDWARN,
+	  if (warned && node->type == NT_MACRO && !(node->flags & NODE_BUILTIN))
+	    cpp_error_with_line (pfile, CPP_DL_NOTE,
 				 node->value.macro->line, 0,
 			 "this is the location of the previous definition");
 	}
--- /dev/null
+++ b/libgcc/config/arm/t-divmod-ef
@@ -0,0 +1,4 @@
+# On ARM, specifying -fnon-call-exceptions will needlessly pull in
+# the unwinder in simple programs which use 64-bit division.  Omitting
+# the option is safe.
+LIB2_DIVMOD_EXCEPTION_FLAGS := -fexceptions
--- /dev/null
+++ b/libgcc/config/mips/t-crtfm
@@ -0,0 +1,3 @@
+crtfastmath.o: $(gcc_srcdir)/config/mips/crtfastmath.c
+	$(gcc_compile) -c $(gcc_srcdir)/config/mips/crtfastmath.c
+
--- a/libgcc/config/rs6000/t-ppccomm
+++ b/libgcc/config/rs6000/t-ppccomm
@@ -101,3 +101,63 @@

 ncrtn$(objext): ncrtn.S
 	$(crt_compile) -c ncrtn.S
+
+crtsavres$(objext): crtsavres.S
+	$(crt_compile) -c crtsavres.S
+
+crtsavfpr$(objext): crtsavfpr.S
+	$(crt_compile) -c crtsavfpr.S
+
+crtresfpr$(objext): crtresfpr.S
+	$(crt_compile) -c crtresfpr.S
+
+crtsavgpr$(objext): crtsavgpr.S
+	$(crt_compile) -c crtsavgpr.S
+
+crtresgpr$(objext): crtresgpr.S
+	$(crt_compile) -c crtresgpr.S
+
+crtresxfpr$(objext): crtresxfpr.S
+	$(crt_compile) -c crtresxfpr.S
+
+crtresxgpr$(objext): crtresxgpr.S
+	$(crt_compile) -c crtresxgpr.S
+
+e500crtres32gpr$(objext): e500crtres32gpr.S
+	$(crt_compile) -c e500crtres32gpr.S
+
+e500crtres64gpr$(objext): e500crtres64gpr.S
+	$(crt_compile) -c e500crtres64gpr.S
+
+e500crtres64gprctr$(objext): e500crtres64gprctr.S
+	$(crt_compile) -c e500crtres64gprctr.S
+
+e500crtrest32gpr$(objext): e500crtrest32gpr.S
+	$(crt_compile) -c e500crtrest32gpr.S
+
+e500crtrest64gpr$(objext): e500crtrest64gpr.S
+	$(crt_compile) -c e500crtrest64gpr.S
+
+e500crtresx32gpr$(objext): e500crtresx32gpr.S
+	$(crt_compile) -c e500crtresx32gpr.S
+
+e500crtresx64gpr$(objext): e500crtresx64gpr.S
+	$(crt_compile) -c e500crtresx64gpr.S
+
+e500crtsav32gpr$(objext): e500crtsav32gpr.S
+	$(crt_compile) -c e500crtsav32gpr.S
+
+e500crtsav64gpr$(objext): e500crtsav64gpr.S
+	$(crt_compile) -c e500crtsav64gpr.S
+
+e500crtsav64gprctr$(objext): e500crtsav64gprctr.S
+	$(crt_compile) -c e500crtsav64gprctr.S
+
+e500crtsavg32gpr$(objext): e500crtsavg32gpr.S
+	$(crt_compile) -c e500crtsavg32gpr.S
+
+e500crtsavg64gpr$(objext): e500crtsavg64gpr.S
+	$(crt_compile) -c e500crtsavg64gpr.S
+
+e500crtsavg64gprctr$(objext): e500crtsavg64gprctr.S
+	$(crt_compile) -c e500crtsavg64gprctr.S
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -203,12 +203,15 @@
 arm*-*-netbsd*)
 	;;
 arm*-*-linux*)			# ARM GNU/Linux with ELF
+	tmake_file="${tmake_file} arm/t-divmod-ef"
 	;;
 arm*-*-uclinux*)		# ARM ucLinux
+	tmake_file="${tmake_file} arm/t-divmod-ef"
 	;;
 arm*-*-ecos-elf)
 	;;
 arm*-*-eabi* | arm*-*-symbianelf* )
+	tmake_file="${tmake_file} arm/t-divmod-ef"
 	;;
 arm*-*-rtems*)
 	;;
@@ -394,8 +397,12 @@
 mips*-*-netbsd*)			# NetBSD/mips, either endian.
 	;;
 mips64*-*-linux*)
+	extra_parts="$extra_parts crtfastmath.o"
+	tmake_file="{$tmake_file} mips/t-crtfm"
 	;;
 mips*-*-linux*)				# Linux MIPS, either endian.
+	extra_parts="$extra_parts crtfastmath.o"
+	tmake_file="{$tmake_file} mips/t-crtfm"
 	;;
 mips*-*-openbsd*)
 	;;
@@ -419,6 +426,10 @@
         ;;
 mips64orion-*-elf* | mips64orionel-*-elf*)
 	;;
+mips64octeon-wrs-elf* | mips64octeonel-wrs-elf*)
+	;;
+mips64octeon-montavista-elf*)
+	;;
 mips*-*-rtems*)
 	;;
 mips-wrs-vxworks)
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -389,18 +389,24 @@
 endif
 endif

+ifeq ($(LIB2_DIVMOD_EXCEPTION_FLAGS),)
+# Provide default flags for compiling divmod functions, if they haven't been
+# set already by a target-specific Makefile fragment.
+LIB2_DIVMOD_EXCEPTION_FLAGS := -fexceptions -fnon-call-exceptions
+endif
+
 # Build LIB2_DIVMOD_FUNCS.
 lib2-divmod-o = $(patsubst %,%$(objext),$(LIB2_DIVMOD_FUNCS))
 $(lib2-divmod-o): %$(objext): $(gcc_srcdir)/libgcc2.c
 	$(gcc_compile) -DL$* -c $(gcc_srcdir)/libgcc2.c \
-	  -fexceptions -fnon-call-exceptions $(vis_hide)
+	  $(LIB2_DIVMOD_EXCEPTION_FLAGS) $(vis_hide)
 libgcc-objects += $(lib2-divmod-o)

 ifeq ($(enable_shared),yes)
 lib2-divmod-s-o = $(patsubst %,%_s$(objext),$(LIB2_DIVMOD_FUNCS))
 $(lib2-divmod-s-o): %_s$(objext): $(gcc_srcdir)/libgcc2.c
 	$(gcc_s_compile) -DL$* -c $(gcc_srcdir)/libgcc2.c \
-	  -fexceptions -fnon-call-exceptions
+	  $(LIB2_DIVMOD_EXCEPTION_FLAGS)
 libgcc-s-objects += $(lib2-divmod-s-o)
 endif

--- a/libgcc/shared-object.mk
+++ b/libgcc/shared-object.mk
@@ -8,11 +8,13 @@

 ifeq ($(suffix $o),.c)

+c_flags-$(base)$(objext) := $(c_flags)
 $(base)$(objext): $o
-	$(gcc_compile) $(c_flags) -c $< $(vis_hide)
+	$(gcc_compile) $(c_flags-$@) -c $< $(vis_hide)

+c_flags-$(base)_s$(objext) := $(c_flags)
 $(base)_s$(objext): $o
-	$(gcc_s_compile) $(c_flags) -c $<
+	$(gcc_s_compile) $(c_flags-$@) -c $<

 else

--- a/libgcc/static-object.mk
+++ b/libgcc/static-object.mk
@@ -8,8 +8,9 @@

 ifeq ($(suffix $o),.c)

+c_flags-$(base)$(objext) := $(c_flags)
 $(base)$(objext): $o
-	$(gcc_compile) $(c_flags) -c $< $(vis_hide)
+	$(gcc_compile) $(c_flags-$@) -c $< $(vis_hide)

 else

--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -119,6 +119,24 @@
     }
 }

+static void
+consume_whitespace (const char **input)
+{
+  while (ISSPACE (**input))
+    {
+      (*input)++;
+    }
+}
+
+static int
+only_whitespace (const char* input)
+{
+  while (*input != EOS && ISSPACE (*input))
+    input++;
+
+  return (*input == EOS);
+}
+
 /*

 @deftypefn Extension char** buildargv (char *@var{sp})
@@ -179,10 +197,8 @@
       do
 	{
 	  /* Pick off argv[argc] */
-	  while (ISBLANK (*input))
-	    {
-	      input++;
-	    }
+	  consume_whitespace (&input);
+
 	  if ((maxargc == 0) || (argc >= (maxargc - 1)))
 	    {
 	      /* argv needs initialization, or expansion */
@@ -278,10 +294,7 @@
 	  argc++;
 	  argv[argc] = NULL;

-	  while (ISSPACE (*input))
-	    {
-	      input++;
-	    }
+	  consume_whitespace (&input);
 	}
       while (*input != EOS);
     }
@@ -420,8 +433,17 @@
 	goto error;
       /* Add a NUL terminator.  */
       buffer[len] = '\0';
-      /* Parse the string.  */
-      file_argv = buildargv (buffer);
+      /* If the file is empty or contains only whitespace, buildargv would
+	 return a single empty argument.  In this context we want no arguments,
+	 instead.  */
+      if (only_whitespace (buffer))
+	{
+	  file_argv = (char **) xmalloc (sizeof (char *));
+	  file_argv[0] = NULL;
+	}
+      else
+	/* Parse the string.  */
+	file_argv = buildargv (buffer);
       /* If *ARGVP is not already dynamically allocated, copy it.  */
       if (!argv_dynamic)
 	{
@@ -434,7 +456,7 @@
 	}
       /* Count the number of arguments.  */
       file_argc = 0;
-      while (file_argv[file_argc] && *file_argv[file_argc])
+      while (file_argv[file_argc])
 	++file_argc;
       /* Now, insert FILE_ARGV into ARGV.  The "+1" below handles the
 	 NULL terminator at the end of ARGV.  */
--- a/libstdc++-v3/config/cpu/sh/atomicity.h
+++ b/libstdc++-v3/config/cpu/sh/atomicity.h
@@ -25,47 +25,48 @@

 #ifdef __SH4A__

-#ifndef _GLIBCXX_ATOMICITY_H
-#define _GLIBCXX_ATOMICITY_H	1
+#include <ext/atomicity.h>

-typedef int _Atomic_word;
+_GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)

-static inline _Atomic_word
-__attribute__ ((__unused__))
-__exchange_and_add (volatile _Atomic_word* __mem, int __val)
-{
-  _Atomic_word __result;
+  typedef int _Atomic_word;

-  __asm__ __volatile__
-    ("0:\n"
-     "\tmovli.l\t@%2,r0\n"
-     "\tmov\tr0,%1\n"
-     "\tadd\t%3,r0\n"
-     "\tmovco.l\tr0,@%2\n"
-     "\tbf\t0b"
-     : "+m" (*__mem), "=r" (__result)
-     : "r" (__mem), "rI08" (__val)
-     : "r0");
-
-  return __result;
-}
-
-
-static inline void
-__attribute__ ((__unused__))
-__atomic_add (volatile _Atomic_word* __mem, int __val)
-{
-  asm("0:\n"
-      "\tmovli.l\t@%1,r0\n"
-      "\tadd\t%2,r0\n"
-      "\tmovco.l\tr0,@%1\n"
-      "\tbf\t0b"
-      : "+m" (*__mem)
-      : "r" (__mem), "rI08" (__val)
-      : "r0");
-}
+  _Atomic_word
+  __attribute__ ((__unused__))
+  __exchange_and_add (volatile _Atomic_word* __mem, int __val)
+  {
+    _Atomic_word __result;

-#endif
+    __asm__ __volatile__
+      ("0:\n"
+       "\tmovli.l\t@%2,r0\n"
+       "\tmov\tr0,%1\n"
+       "\tadd\t%3,r0\n"
+       "\tmovco.l\tr0,@%2\n"
+       "\tbf\t0b"
+       : "+m" (*__mem), "=&r" (__result)
+       : "r" (__mem), "rI08" (__val)
+       : "r0");
+
+    return __result;
+  }
+
+
+  void
+  __attribute__ ((__unused__))
+  __atomic_add (volatile _Atomic_word* __mem, int __val)
+  {
+    asm("0:\n"
+	"\tmovli.l\t@%1,r0\n"
+	"\tadd\t%2,r0\n"
+	"\tmovco.l\tr0,@%1\n"
+	"\tbf\t0b"
+	: "+m" (*__mem)
+	: "r" (__mem), "rI08" (__val)
+	: "r0");
+  }
+
+_GLIBCXX_END_NAMESPACE

 #else /* !__SH4A__ */

--- a/libstdc++-v3/libsupc++/eh_arm.cc
+++ b/libstdc++-v3/libsupc++/eh_arm.cc
@@ -38,7 +38,7 @@
 extern "C" __cxa_type_match_result
 __cxa_type_match(_Unwind_Exception* ue_header,
 		 const std::type_info* catch_type,
-		 bool is_reference __attribute__((__unused__)),
+		 bool is_reference,
 		 void** thrown_ptr_p)
 {
   bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class);
@@ -68,11 +68,11 @@
   if (throw_type->__is_pointer_p())
     thrown_ptr = *(void**) thrown_ptr;

-  if (catch_type->__do_catch(throw_type, &thrown_ptr, 1))
+  if (catch_type->__do_catch (throw_type, &thrown_ptr, 1 + is_reference * 2))
     {
       *thrown_ptr_p = thrown_ptr;

-      if (typeid(*catch_type) == typeid (typeid(void*)))
+      if (typeid (*catch_type) == typeid (typeid(void*)))
 	{
 	  const __pointer_type_info *catch_pointer_type =
 	    static_cast<const __pointer_type_info *> (catch_type);
--- a/libstdc++-v3/libsupc++/eh_personality.cc
+++ b/libstdc++-v3/libsupc++/eh_personality.cc
@@ -89,20 +89,22 @@
 // Return an element from a type table.

 static const std::type_info*
-get_ttype_entry(lsda_header_info* info, _uleb128_t i)
+get_ttype_entry(lsda_header_info* info, _uleb128_t i, bool &is_ref)
 {
   _Unwind_Ptr ptr;

   ptr = (_Unwind_Ptr) (info->TType - (i * 4));
   ptr = _Unwind_decode_target2(ptr);

-  return reinterpret_cast<const std::type_info *>(ptr);
+  is_ref = ptr & 1;
+
+  return reinterpret_cast<const std::type_info *>(ptr & ~1);
 }

 // The ABI provides a routine for matching exception object types.
 typedef _Unwind_Control_Block _throw_typet;
-#define get_adjusted_ptr(catch_type, throw_type, thrown_ptr_p) \
-  (__cxa_type_match (throw_type, catch_type, false, thrown_ptr_p) \
+#define get_adjusted_ptr(catch_type, throw_type, is_ref, thrown_ptr_p) \
+  (__cxa_type_match (throw_type, catch_type, is_ref, thrown_ptr_p) \
    != ctm_failed)

 // Return true if THROW_TYPE matches one if the filter types.
@@ -118,6 +120,7 @@
     {
       const std::type_info* catch_type;
       _uleb128_t tmp;
+      bool is_ref;

       tmp = *e;

@@ -129,13 +132,14 @@
       tmp = _Unwind_decode_target2((_Unwind_Word) e);

       // Match a ttype entry.
-      catch_type = reinterpret_cast<const std::type_info*>(tmp);
+      is_ref = tmp & 1;
+      catch_type = reinterpret_cast<const std::type_info*>(tmp & ~1);

       // ??? There is currently no way to ask the RTTI code about the
       // relationship between two types without reference to a specific
       // object.  There should be; then we wouldn't need to mess with
       // thrown_ptr here.
-      if (get_adjusted_ptr(catch_type, throw_type, &thrown_ptr))
+      if (get_adjusted_ptr(catch_type, throw_type, is_ref, &thrown_ptr))
 	return true;

       // Advance to the next entry.
@@ -207,7 +211,7 @@
 // Return an element from a type table.

 static const std::type_info *
-get_ttype_entry (lsda_header_info *info, _uleb128_t i)
+get_ttype_entry (lsda_header_info *info, _uleb128_t i, bool &is_ref)
 {
   _Unwind_Ptr ptr;

@@ -215,7 +219,9 @@
   read_encoded_value_with_base (info->ttype_encoding, info->ttype_base,
 				info->TType - i, &ptr);

-  return reinterpret_cast<const std::type_info *>(ptr);
+  is_ref = ptr & 1;
+
+  return reinterpret_cast<const std::type_info *>(ptr & ~1);
 }

 // Given the thrown type THROW_TYPE, pointer to a variable containing a
@@ -226,6 +232,7 @@
 static bool
 get_adjusted_ptr (const std::type_info *catch_type,
 		  const std::type_info *throw_type,
+		  bool is_ref,
 		  void **thrown_ptr_p)
 {
   void *thrown_ptr = *thrown_ptr_p;
@@ -237,7 +244,7 @@
   if (throw_type->__is_pointer_p ())
     thrown_ptr = *(void **) thrown_ptr;

-  if (catch_type->__do_catch (throw_type, &thrown_ptr, 1))
+  if (catch_type->__do_catch (throw_type, &thrown_ptr, 1 + is_ref * 2))
     {
       *thrown_ptr_p = thrown_ptr;
       return true;
@@ -267,13 +274,15 @@
         return false;

       // Match a ttype entry.
-      catch_type = get_ttype_entry (info, tmp);
+      bool is_ref;
+
+      catch_type = get_ttype_entry (info, tmp, is_ref);

       // ??? There is currently no way to ask the RTTI code about the
       // relationship between two types without reference to a specific
       // object.  There should be; then we wouldn't need to mess with
       // thrown_ptr here.
-      if (get_adjusted_ptr (catch_type, throw_type, &thrown_ptr))
+      if (get_adjusted_ptr (catch_type, throw_type, is_ref, &thrown_ptr))
 	return true;
     }
 }
@@ -582,14 +591,16 @@
 	  else if (ar_filter > 0)
 	    {
 	      // Positive filter values are handlers.
-	      catch_type = get_ttype_entry (&info, ar_filter);
+	      bool is_ref;
+
+	      catch_type = get_ttype_entry (&info, ar_filter, is_ref);

 	      // Null catch type is a catch-all handler; we can catch foreign
 	      // exceptions with this.  Otherwise we must match types.
 	      if (! catch_type
 		  || (throw_type
 		      && get_adjusted_ptr (catch_type, throw_type,
-					   &thrown_ptr)))
+					   is_ref, &thrown_ptr)))
 		{
 		  saw_handler = true;
 		  break;
--- a/libcpp/Makefile.in
+++ b/libcpp/Makefile.in
@@ -72,13 +72,12 @@
 libcpp_a_OBJS = charset.o directives.o directives-only.o errors.o \
 	expr.o files.o identifiers.o init.o lex.o line-map.o macro.o \
 	mkdeps.o pch.o symtab.o traditional.o
-makedepend_OBJS = makedepend.o

 libcpp_a_SOURCES = charset.c directives.c directives-only.c errors.c \
 	expr.c files.c identifiers.c init.c lex.c line-map.c macro.c \
 	mkdeps.c pch.c symtab.c traditional.c

-all: libcpp.a makedepend$(EXEEXT) $(USED_CATALOGS)
+all: libcpp.a $(USED_CATALOGS)

 .SUFFIXES:
 .SUFFIXES: .c .gmo .o .obj .po .pox
@@ -88,12 +87,6 @@
 	$(AR) $(ARFLAGS) libcpp.a $(libcpp_a_OBJS)
 	$(RANLIB) libcpp.a

-makedepend$(EXEEXT): $(makedepend_OBJS) libcpp.a ../libiberty/libiberty.a
-	@rm -f makedepend$(EXEEXT)
-	$(CC) $(CFLAGS) $(LDFLAGS) -o makedepend$(EXEEXT) \
-	  $(makedepend_OBJS) libcpp.a ../libiberty/libiberty.a \
-	  $(LIBINTL) $(LIBICONV)
-
 # Rules to rebuild the configuration

 Makefile: $(srcdir)/Makefile.in config.status
@@ -165,7 +158,7 @@
 	-rm -f *.o

 clean: mostlyclean
-	-rm -rf makedepend$(EXEEXT) libcpp.a $(srcdir)/autom4te.cache
+	-rm -rf libcpp.a $(srcdir)/autom4te.cache

 distclean: clean
 	-rm -f config.h stamp-h1 config.status config.cache config.log \
@@ -247,7 +240,7 @@
 	sed 's:$(srcdir)/::g' <po/$(PACKAGE).pot.tmp >po/$(PACKAGE).pot
 	rm po/$(PACKAGE).pot.tmp

-TAGS_SOURCES = $(libcpp_a_SOURCES) makedepend.c internal.h ucnid.h \
+TAGS_SOURCES = $(libcpp_a_SOURCES) internal.h ucnid.h \
     include/line-map.h include/symtab.h include/cpp-id-data.h \
     include/cpplib.h include/mkdeps.h system.h

@@ -259,7 +252,7 @@
 .NOEXPORT:

 # Dependencies
--include $(patsubst %.o, $(DEPDIR)/%.Po, $(libcpp_a_OBJS) $(makedepend_OBJS))
+-include $(patsubst %.o, $(DEPDIR)/%.Po, $(libcpp_a_OBJS))

 # Dependencies on generated headers have to be explicit.
 init.o: localedir.h