diff --git a/gcc/ai-optimizer.cc b/gcc/ai-optimizer.cc index 9edd935a70a3ac0b0fe69b3853cadaacb3270260..c127599764fb9f0380d8f33c6a8e8674545441f1 100644 --- a/gcc/ai-optimizer.cc +++ b/gcc/ai-optimizer.cc @@ -284,14 +284,15 @@ static int graph_infer (int argc1, const char **argv1, const char *mops, int argc2, int64_t *argv2) { - char gcc_exec_prefix[512]; + const int prefix_buff_len = 512; + char gcc_exec_prefix[prefix_buff_len] = {0}; ssize_t len = readlink ("/proc/self/exe", gcc_exec_prefix, sizeof (gcc_exec_prefix) - 1); if (len == -1) return 0; - char native_file[512]; - strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1); + char native_file[prefix_buff_len] = {0}; + strncpy (native_file, gcc_exec_prefix, len); const char *target = "bin/gcc"; const char *target_cc1 = "cc1"; const char *target_gpp = "bin/g++"; @@ -329,6 +330,9 @@ graph_infer (int argc1, const char **argv1, const char *mops, strlen (native_file) - 1); } } + else + return 0; + if (access (native_file, F_OK) == 0) fill_node (native_file); else @@ -421,4 +425,4 @@ get_optimize_decision_from_optimizer (int argc, const char **argv, { putenv ("AI_INFER_LEVEL=1"); } -} \ No newline at end of file +} diff --git a/gcc/common.opt b/gcc/common.opt index d4f391e4824df005213ed566c91f185ab7d4ba2f..bdf9b64366466c4de0979c119941ba347cc734d2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1243,6 +1243,10 @@ fcompare-elim Common Var(flag_compare_elim_after_reload) Optimization Perform comparison elimination after register allocation has finished. +floop-elim +Common Var(flag_loop_elim) Init(0) Optimization +Perform redundant loop elimination. + fconserve-stack Common Var(flag_conserve_stack) Optimization Do not perform optimizations increasing noticeably stack usage. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f308a72ec70be71fef785b247bf48dabbef774e1..432b771012c5c077083b09b72e2ed17756a3648c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6454,6 +6454,22 @@ [(set_attr "type" "neon_compare, neon_compare_zero")] ) +;; Use cmlt to replace vector arithmetic operations like this (SImode example): +;; B = ((A >> 15) & 0x00010001) * 0x00001111 +(define_insn "*aarch64_cmlt_as_arith2" + [(set (match_operand: 0 "register_operand" "=w") + (mult: + (and: + (lshiftrt: + (match_operand:VDQHSD 1 "register_operand" "w") + (match_operand:VDQHSD 2 "half_size_minus_one_operand")) + (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")) + (match_operand:VDQHSD 4 "half_bit_all_one_operand")))] + "TARGET_SIMD && flag_cmlt_arith" + "cmlt\t%0., %1., #0" + [(set_attr "type" "neon_compare_zero")] +) + ;; Use cmlt to replace vector arithmetic operations like this (SImode example): ;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001) ;; TODO: maybe extend to scalar operations or other cm** instructions. diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 3e4c2957aa183460e81a53e45e8ff4b0de7dc31a..94d10e5015ff9b795154bf8b5cc739ec744405ea 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -1973,7 +1973,7 @@ static const struct tune_params hip12_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ &hip12_prefetch_tune }; @@ -15036,6 +15036,18 @@ cost_plus: return true; case MULT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (flag_cmlt_arith && GET_CODE (op0) == AND) + { + rtx op0_subop0 = XEXP (op0, 0); + if (GET_CODE (op0_subop0) == LSHIFTRT) + { + *cost += rtx_cost (op0, mode, MULT, 0, speed); + *cost += rtx_cost (op1, mode, MULT, 0, speed); + return true; + } + } *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); /* aarch64_rtx_mult_cost always handles recursion to its operands. */ @@ -16853,6 +16865,7 @@ static void override_CPP_optimize_options (struct gcc_options *opts) { opts->x_flag_finite_loops = 1; + opts->x_flag_loop_elim = 1; opts->x_flag_omit_frame_pointer = 1; opts->x_flag_sized_deallocation = 0; opts->x_param_early_inlining_insns = 256; diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 07c14aacb03d804fd5194ab1e5dd089e9d0a60db..1603dab88bbba9fb10ac98a30c4cf5ebee500cea 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -74,6 +74,15 @@ return CONST_INT_P (op) && (UINTVAL (op) == mask); }) +(define_predicate "half_bit_all_one_operand" + (match_code "const_vector") +{ + op = unwrap_const_vec_duplicate (op); + unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; + unsigned long long mask = ((unsigned long long) 1 << size) - 1; + return CONST_INT_P (op) && (UINTVAL (op) == mask); +}) + (define_predicate "subreg_lowpart_operator" (ior (match_code "truncate") (and (match_code "subreg") diff --git a/gcc/testsuite/gcc.dg/combine-cmlt-2.c b/gcc/testsuite/gcc.dg/combine-cmlt-2.c new file mode 100755 index 0000000000000000000000000000000000000000..bb6a92b2d8c213baa6a256e3a53924ed8a3c4db8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/combine-cmlt-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target aarch64-*-* } } */ +/* { dg-options "-O3 -mcmlt-arith -mcpu=hip12" } */ + +/* The test checks usage of cmlt insns for arithmetic/logic calculations + * in foo (). It's inspired by sources of x264 codec. */ + +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; + +void foo( uint32_t *a, uint32_t *b) +{ + for (unsigned i = 0; i < 4; i++) + { + uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1)) + &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1); + b[i] = (a[i]+s)^s; + } +} + +/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */ diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index c56d0b9ff151725ce97a1ecca0476f034cb71a60..cf300d141214af1f8f201069a400d0314c416422 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -77,6 +77,7 @@ static hash_set * get_non_trapping (); static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree); static void hoist_adjacent_loads (basic_block, basic_block, basic_block, basic_block); +static bool do_phiopt_pattern (basic_block, basic_block, basic_block); static bool gate_hoist_loads (void); /* This pass tries to transform conditional stores into unconditional @@ -266,6 +267,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) hoist_adjacent_loads (bb, bb1, bb2, bb3); continue; } + else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) + { + continue; + } else continue; @@ -3767,6 +3772,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, } } +static bool check_uses (tree, hash_set *); + +/* Check SSA_NAME is used in + if (SSA_NAME == 0) + ... + or + if (SSA_NAME != 0) + ... +*/ +static bool +check_uses_cond (const_tree ssa_name, gimple *stmt, + hash_set *hset ATTRIBUTE_UNUSED) +{ + tree_code code = gimple_cond_code (stmt); + if (code != EQ_EXPR && code != NE_EXPR) + { + return false; + } + + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + if ((lhs == ssa_name && integer_zerop (rhs)) + || (rhs == ssa_name && integer_zerop (lhs))) + { + return true; + } + + return false; +} + +/* Check SSA_NAME is used in + _tmp = SSA_NAME == 0; + or + _tmp = SSA_NAME != 0; + or + _tmp = SSA_NAME | _tmp2; +*/ +static bool +check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + tree_code code = gimple_assign_rhs_code (stmt); + tree lhs, rhs1, rhs2; + + switch (code) + { + case EQ_EXPR: + case NE_EXPR: + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + if ((rhs1 == ssa_name && integer_zerop (rhs2)) + || (rhs2 == ssa_name && integer_zerop (rhs1))) + { + return true; + } + break; + + case BIT_IOR_EXPR: + lhs = gimple_assign_lhs (stmt); + if (hset->contains (lhs)) + { + return false; + } + /* We should check the use of _tmp further. */ + return check_uses (lhs, hset); + + default: + break; + } + return false; +} + +/* Check SSA_NAME is used in + # result = PHI +*/ +static bool +check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) + { + tree arg = gimple_phi_arg_def (stmt, i); + if (!integer_zerop (arg) && arg != ssa_name) + { + return false; + } + } + + tree result = gimple_phi_result (stmt); + + /* It is used to avoid infinite recursion, + + if (cond) + goto + else + goto + + + # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> + {BODY} + if (cond) + goto + else + goto + + + # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> + {BODY} + if (cond) + goto + else + goto + + + ... + */ + if (hset->contains (result)) + { + return false; + } + + return check_uses (result, hset); +} + +/* Check the use of SSA_NAME, it should only be used in comparison + operation and PHI node. HSET is used to record the ssa_names + that have been already checked. */ +static bool +check_uses (tree ssa_name, hash_set *hset) +{ + imm_use_iterator imm_iter; + use_operand_p use_p; + + if (TREE_CODE (ssa_name) != SSA_NAME) + { + return false; + } + + if (SSA_NAME_VAR (ssa_name) + && is_global_var (SSA_NAME_VAR (ssa_name))) + { + return false; + } + + hset->add (ssa_name); + + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) + { + gimple *stmt = USE_STMT (use_p); + + /* Ignore debug gimple statements. */ + if (is_gimple_debug (stmt)) + { + continue; + } + + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + if (!check_uses_cond (ssa_name, stmt, hset)) + { + return false; + } + break; + + case GIMPLE_ASSIGN: + if (!check_uses_assign (ssa_name, stmt, hset)) + { + return false; + } + break; + + case GIMPLE_PHI: + if (!check_uses_phi (ssa_name, stmt, hset)) + { + return false; + } + break; + + default: + return false; + } + } + return true; +} + +static bool +check_def_gimple (gimple *def1, gimple *def2, const_tree result) +{ + /* def1 and def2 should be POINTER_PLUS_EXPR. */ + if (!is_gimple_assign (def1) || !is_gimple_assign (def2) + || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR + || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) + { + return false; + } + + tree rhs12 = gimple_assign_rhs2 (def1); + + tree rhs21 = gimple_assign_rhs1 (def2); + tree rhs22 = gimple_assign_rhs2 (def2); + + if (rhs21 != result) + { + return false; + } + + /* We should have a positive pointer-plus constant to ensure + that the pointer value is continuously increasing. */ + if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST + || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) + { + return false; + } + + return true; +} + +static bool +check_loop_body (basic_block bb0, basic_block bb2, const_tree result) +{ + gimple *g01 = first_stmt (bb0); + if (!g01 || !is_gimple_assign (g01) + || gimple_assign_rhs_code (g01) != MEM_REF + || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) + { + return false; + } + + gimple *g02 = g01->next; + /* GIMPLE_COND would be the last gimple in a basic block, + and have no other side effects on RESULT. */ + if (!g02 || gimple_code (g02) != GIMPLE_COND) + { + return false; + } + + if (first_stmt (bb2) != last_stmt (bb2)) + { + return false; + } + + return true; +} + +/* Pattern is like +
+   arg1 = base (rhs11) + cst (rhs12); [def1]
+   goto 
+
+   
+   arg2 = result (rhs21) + cst (rhs22); [def2]
+
+   
+   # result = PHI 
+   _v = *result;  [g01]
+   if (_v == 0)   [g02]
+     goto 
+   else
+     goto 
+
+   
+   _1 = result - base;     [g1]
+   _2 = _1 /[ex] cst;      [g2]
+   _3 = (unsigned int) _2; [g3]
+   if (_3 == 0)
+   ...
+*/
+static bool
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
+		gphi *phi_stmt, gimple *&output)
+{
+  /* Start check from PHI node in BB0.  */
+  if (gimple_phi_num_args (phi_stmt) != 2
+      || virtual_operand_p (gimple_phi_result (phi_stmt)))
+    {
+      return false;
+    }
+
+  tree result = gimple_phi_result (phi_stmt);
+  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
+  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
+
+  if (TREE_CODE (arg1) != SSA_NAME
+      || TREE_CODE (arg2) != SSA_NAME
+      || SSA_NAME_IS_DEFAULT_DEF (arg1)
+      || SSA_NAME_IS_DEFAULT_DEF (arg2))
+    {
+      return false;
+    }
+
+  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
+  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
+
+  /* Swap bb1 and bb2 if pattern is like
+     if (_v != 0)
+       goto 
+     else
+       goto 
+  */
+  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
+    {
+      std::swap (bb1, bb2);
+    }
+
+  /* prebb[def1] --> bb0 <-- bb2[def2] */
+  if (!gimple_bb (def1)
+      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
+      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
+    {
+      return false;
+    }
+
+  /* Check whether define gimple meets the pattern requirements.  */
+  if (!check_def_gimple (def1, def2, result))
+    {
+      return false;
+    }
+
+  if (!check_loop_body (bb0, bb2, result))
+    {
+      return false;
+    }
+
+  output = def1;
+  return true;
+}
+
+/* Check pattern
+   
+   _1 = result - base;     [g1]
+   _2 = _1 /[ex] cst;      [g2]
+   _3 = (unsigned int) _2; [g3]
+   if (_3 == 0)
+   ...
+*/
+static bool
+check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
+		    const_tree result, gimple *&output)
+{
+  gimple *g1 = first_stmt (bb1);
+  if (!g1 || !is_gimple_assign (g1)
+      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
+      || gimple_assign_rhs1 (g1) != result
+      || gimple_assign_rhs2 (g1) != base)
+    {
+      return false;
+    }
+
+  gimple *g2 = g1->next;
+  if (!g2 || !is_gimple_assign (g2)
+      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
+      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
+      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
+    {
+      return false;
+    }
+
+  /* INTEGER_CST cst in gimple def1.  */
+  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
+  /* INTEGER_CST cst in gimple g2.  */
+  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
+  /* _2 must be at least a positive number.  */
+  if (num2 == 0 || num1 / num2 <= 0)
+    {
+      return false;
+    }
+
+  gimple *g3 = g2->next;
+  if (!g3 || !is_gimple_assign (g3)
+      || gimple_assign_rhs_code (g3) != NOP_EXPR
+      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
+      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
+    {
+      return false;
+    }
+
+  /* _3 should only be used in comparison operation or PHI node.  */
+  hash_set *hset = new hash_set;
+  if (!check_uses (gimple_assign_lhs (g3), hset))
+    {
+      delete hset;
+      return false;
+    }
+  delete hset;
+
+  output = g3;
+  return true;
+}
+
+static bool
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
+{
+  gphi_iterator gsi;
+
+  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gphi *phi_stmt = gsi.phi ();
+      gimple *def1 = NULL;
+      tree base, cst, result;
+
+      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
+	{
+	  continue;
+	}
+
+      base = gimple_assign_rhs1 (def1);
+      cst = gimple_assign_rhs2 (def1);
+      result = gimple_phi_result (phi_stmt);
+
+      gimple *stmt = NULL;
+      if (!check_gimple_order (bb1, base, cst, result, stmt))
+	{
+	  continue;
+	}
+
+      gcc_assert (stmt);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
+	  print_gimple_stmt (dump_file, stmt, 0);
+	  fprintf (dump_file, "to\n");
+	}
+
+      /* Rewrite statement
+	   _3 = (unsigned int) _2;
+	 to
+	   _3 = (unsigned int) 1;
+      */
+      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
+      update_stmt (stmt);
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  print_gimple_stmt (dump_file, stmt, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      return true;
+    }
+  return false;
+}
+
 /* Determine whether we should attempt to hoist adjacent loads out of
    diamond patterns in pass_phiopt.  Always hoist loads if
    -fhoist-adjacent-loads is specified and the target machine has