@@ -379,6 +379,8 @@ c_finish_omp_atomic (location_t loc, enu
if (SCALAR_FLOAT_TYPE_P (cmptype) && !test)
{
bool clear_padding = false;
+ HOST_WIDE_INT non_padding_start = 0;
+ HOST_WIDE_INT non_padding_end = 0;
if (BITS_PER_UNIT == 8 && CHAR_BIT == 8)
{
HOST_WIDE_INT sz = int_size_in_bytes (cmptype), i;
@@ -392,6 +394,40 @@ c_finish_omp_atomic (location_t loc, enu
clear_padding = true;
break;
}
+ if (clear_padding && buf[i] == 0)
+ {
+ /* Try to optimize. In the common case where
+ non-padding bits are all continuous and start
+ and end at a byte boundary, we can just adjust
+ the memcmp call arguments and don't need to
+ emit __builtin_clear_padding calls. */
+ if (i == 0)
+ {
+ for (i = 0; i < sz; i++)
+ if (buf[i] != 0)
+ break;
+ if (i < sz && buf[i] == (unsigned char) ~0)
+ {
+ non_padding_start = i;
+ for (; i < sz; i++)
+ if (buf[i] != (unsigned char) ~0)
+ break;
+ }
+ else
+ i = 0;
+ }
+ if (i != 0)
+ {
+ non_padding_end = i;
+ for (; i < sz; i++)
+ if (buf[i] != 0)
+ {
+ non_padding_start = 0;
+ non_padding_end = 0;
+ break;
+ }
+ }
+ }
}
tree inttype = NULL_TREE;
if (!clear_padding && tree_fits_uhwi_p (TYPE_SIZE (cmptype)))
@@ -428,12 +464,22 @@ c_finish_omp_atomic (location_t loc, enu
tmp2 = build4 (TARGET_EXPR, cmptype, tmp2,
TREE_OPERAND (rhs1, 1), NULL, NULL);
tmp2 = build1 (ADDR_EXPR, pcmptype, tmp2);
+ if (non_padding_start)
+ {
+ tmp1 = build2 (POINTER_PLUS_EXPR, pcmptype, tmp1,
+ size_int (non_padding_start));
+ tmp2 = build2 (POINTER_PLUS_EXPR, pcmptype, tmp2,
+ size_int (non_padding_start));
+ }
tree fndecl = builtin_decl_explicit (BUILT_IN_MEMCMP);
rhs1 = build_call_expr_loc (loc, fndecl, 3, tmp1, tmp2,
- TYPE_SIZE_UNIT (cmptype));
+ non_padding_end
+ ? size_int (non_padding_end
+ - non_padding_start)
+ : TYPE_SIZE_UNIT (cmptype));
rhs1 = build2 (EQ_EXPR, boolean_type_node, rhs1,
integer_zero_node);
- if (clear_padding)
+ if (clear_padding && non_padding_end == 0)
{
fndecl = builtin_decl_explicit (BUILT_IN_CLEAR_PADDING);
tree cp1 = build_call_expr_loc (loc, fndecl, 1, tmp1);