diff mbox series

Enhance optimize_atomic_bit_test_and to handle truncation.

Message ID 20211117052005.91656-1-hongtao.liu@intel.com
State New
Headers show
Series Enhance optimize_atomic_bit_test_and to handle truncation. | expand

Commit Message

liuhongt Nov. 17, 2021, 5:20 a.m. UTC
r12-5102-gfb161782545224f5 improves integer bit test on
__atomic_fetch_[or|and]_* returns only for nop_convert, .i.e.

transfrom

  mask_5 = 1 << bit_4(D);
  mask.0_1 = (unsigned int) mask_5;
  _2 = __atomic_fetch_or_4 (a_7(D), mask.0_1, 0);
  t1_9 = (int) _2;
  t2_10 = mask_5 & t1_9;

to

  mask_5 = 1 << n_4(D);
  mask.1_1 = (unsigned int) mask_5;
  _11 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_1_4, n_4(D), 0);
  _8 = (int) _11;

And this patch extend the original patch to handle truncation.
.i.e.

transform

  long int mask;
  mask_8 = 1 << n_7(D);
  mask.0_1 = (long unsigned int) mask_8;
  _2 = __sync_fetch_and_or_8 (&pscc_a_2_3, mask.0_1);
  _3 = (unsigned int) _2;
  _4 = (unsigned int) mask_8;
  _5 = _3 & _4;
  _6 = (int) _5;

to

  long int mask;
  mask_8 = 1 << n_7(D);
  mask.0_1 = (long unsigned int) mask_8;
  _14 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_2_3, n_7(D), 0);
  _5 = (unsigned int) _14;
  _6 = (int) _5;

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

2021-11-17  Hongtao Liu  <hongtao.liu@intel.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

gcc/ChangeLog:

	PR tree-optimization/103194
	* match.pd (gimple_nop_atomic_bit_test_and_p): Extended to
	match truncation.
	* tree-ssa-ccp.c (gimple_nop_convert): Declare.
	(optimize_atomic_bit_test_and): Enhance
	optimize_atomic_bit_test_and to handle truncation.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr103194-2.c: New test.
	* gcc.target/i386/pr103194-3.c: New test.
	* gcc.target/i386/pr103194-4.c: New test.
	* gcc.target/i386/pr103194-5.c: New test.
	* gcc.target/i386/pr103194.c: New test.
---
 gcc/match.pd                               | 48 ++++++-----
 gcc/testsuite/gcc.target/i386/pr103194-2.c | 64 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-3.c | 64 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-4.c | 61 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194-5.c | 61 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr103194.c   | 16 ++++
 gcc/tree-ssa-ccp.c                         | 99 +++++++++++-----------
 7 files changed, 345 insertions(+), 68 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103194.c

Comments

Jeff Law Nov. 23, 2021, 5:56 p.m. UTC | #1
On 11/16/2021 10:20 PM, liuhongt via Gcc-patches wrote:
> r12-5102-gfb161782545224f5 improves integer bit test on
> __atomic_fetch_[or|and]_* returns only for nop_convert, .i.e.
>
> transfrom
>
>    mask_5 = 1 << bit_4(D);
>    mask.0_1 = (unsigned int) mask_5;
>    _2 = __atomic_fetch_or_4 (a_7(D), mask.0_1, 0);
>    t1_9 = (int) _2;
>    t2_10 = mask_5 & t1_9;
>
> to
>
>    mask_5 = 1 << n_4(D);
>    mask.1_1 = (unsigned int) mask_5;
>    _11 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_1_4, n_4(D), 0);
>    _8 = (int) _11;
>
> And this patch extend the original patch to handle truncation.
> .i.e.
>
> transform
>
>    long int mask;
>    mask_8 = 1 << n_7(D);
>    mask.0_1 = (long unsigned int) mask_8;
>    _2 = __sync_fetch_and_or_8 (&pscc_a_2_3, mask.0_1);
>    _3 = (unsigned int) _2;
>    _4 = (unsigned int) mask_8;
>    _5 = _3 & _4;
>    _6 = (int) _5;
>
> to
>
>    long int mask;
>    mask_8 = 1 << n_7(D);
>    mask.0_1 = (long unsigned int) mask_8;
>    _14 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_2_3, n_7(D), 0);
>    _5 = (unsigned int) _14;
>    _6 = (int) _5;
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> 2021-11-17  Hongtao Liu  <hongtao.liu@intel.com>
> 	    H.J. Lu  <hongjiu.lu@intel.com>
>
> gcc/ChangeLog:
>
> 	PR tree-optimization/103194
> 	* match.pd (gimple_nop_atomic_bit_test_and_p): Extended to
> 	match truncation.
> 	* tree-ssa-ccp.c (gimple_nop_convert): Declare.
> 	(optimize_atomic_bit_test_and): Enhance
> 	optimize_atomic_bit_test_and to handle truncation.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/i386/pr103194-2.c: New test.
> 	* gcc.target/i386/pr103194-3.c: New test.
> 	* gcc.target/i386/pr103194-4.c: New test.
> 	* gcc.target/i386/pr103194-5.c: New test.
> 	* gcc.target/i386/pr103194.c: New test.
OK
jeff
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 7f76925b6c6..6c68534fff5 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4021,39 +4021,43 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 #if GIMPLE
 (match (nop_atomic_bit_test_and_p @0 @1 @4)
- (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+ (bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
 	   INTEGER_CST@1)
  (with {
 	 int ibit = tree_log2 (@0);
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+ (bit_and (convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
 	  INTEGER_CST@1)
  (with {
 	 int ibit = tree_log2 (@0);
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @0 @4)
  (bit_and:c
-  (nop_convert?@4
+  (convert1?@4
    (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
-  @0))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
- (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+ (bit_and@4 (convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
 	    INTEGER_CST@1)
  (with {
 	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4061,11 +4065,12 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
 (match (nop_atomic_bit_test_and_p @0 @1 @3)
  (bit_and@4
-  (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+  (convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
   INTEGER_CST@1)
  (with {
 	 int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)),
@@ -4073,19 +4078,22 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	 int ibit2 = tree_log2 (@1);
        }
   (if (ibit == ibit2
-      && ibit >= 0))))
+      && ibit >= 0
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
-   @0))
+  (convert1?@3
+   (ATOMIC_FETCH_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7))) @5))
+  (convert2? @0))
+ (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
-(match (nop_atomic_bit_test_and_p @0 @0 @3)
+(match (nop_atomic_bit_test_and_p @4 @0 @3)
  (bit_and:c
-  (nop_convert?@3
-   (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
-   @0))
+  (convert1?@3
+   (SYNC_FETCH_AND_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7)))))
+  (convert2? @0))
+  (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))
 
 #endif
 
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-2.c b/gcc/testsuite/gcc.target/i386/pr103194-2.c
new file mode 100644
index 00000000000..1a991fe0199
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-2.c
@@ -0,0 +1,64 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE,MASK)						\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(char, short, 0);
+FOO(char, short, 7);
+FOO(short, int, 0);
+FOO(short, int, 15);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 16 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 8 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-3.c b/gcc/testsuite/gcc.target/i386/pr103194-3.c
new file mode 100644
index 00000000000..4907598bbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-3.c
@@ -0,0 +1,64 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(RTYPE, TYPE,MASK)						\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a)			\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+
+FOO(int, int64, 1);
+FOO(int, int64, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 8 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 4 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-4.c b/gcc/testsuite/gcc.target/i386/pr103194-4.c
new file mode 100644
index 00000000000..8573016c5d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-4.c
@@ -0,0 +1,61 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)							\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1 << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(short, int);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c b/gcc/testsuite/gcc.target/i386/pr103194-5.c
new file mode 100644
index 00000000000..dfaddf0aa6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194-5.c
@@ -0,0 +1,61 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(RTYPE,TYPE)							\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)		\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask;	\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_or (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_xor (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_xor_and_fetch (a, mask) & mask;			\
+  }									\
+  __attribute__((noinline,noclone)) RTYPE				\
+  sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK)	\
+  {									\
+    TYPE mask = 1ll << MASK;						\
+    return __sync_fetch_and_and (a, ~mask) & mask;			\
+  }									\
+
+FOO(int, long);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103194.c b/gcc/testsuite/gcc.target/i386/pr103194.c
new file mode 100644
index 00000000000..a6d84332e4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103194.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+long pscc_a_2_3;
+int pscc_a_1_4;
+void pscc()
+{
+  pscc_a_1_4 = __sync_fetch_and_and(&pscc_a_2_3, 1);
+}
+
+static int si;
+long
+test_types (long n)
+{
+  unsigned int u2 = __atomic_fetch_xor (&si, 0, 5);
+  return u2;
+}
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 18d57729d8a..9e12da8f011 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3326,6 +3326,7 @@  convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
  */
 extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
 					      tree (*) (tree));
+extern bool gimple_nop_convert (tree, tree*, tree (*) (tree));
 
 /* Optimize
      mask_2 = 1 << cnt_1;
@@ -3462,16 +3463,16 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	  ibit = 0;
 	}
       else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
-	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	       <= TYPE_PRECISION (TREE_TYPE (use_rhs)))
 	{
 	  gimple *use_nop_stmt;
 	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
 	      || !is_gimple_assign (use_nop_stmt))
 	    return;
+	  tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
 	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
 	  if (rhs_code != BIT_AND_EXPR)
 	    {
-	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
 	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
 		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
 		return;
@@ -3584,24 +3585,23 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	    }
 	  else
 	    {
-	      tree and_expr = gimple_assign_lhs (use_nop_stmt);
 	      tree match_op[3];
 	      gimple *g;
-	      if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+	      if (!gimple_nop_atomic_bit_test_and_p (use_nop_lhs,
 						     &match_op[0], NULL)
 		  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2])
 		  || !single_imm_use (match_op[2], &use_p, &g)
 		  || !is_gimple_assign (g))
 		return;
-	      mask = match_op[1];
-	      if (TREE_CODE (mask) == INTEGER_CST)
+	      mask = match_op[0];
+	      if (TREE_CODE (match_op[1]) == INTEGER_CST)
 		{
-		  ibit = tree_log2 (mask);
+		  ibit = tree_log2 (match_op[1]);
 		  gcc_assert (ibit >= 0);
 		}
 	      else
 		{
-		  g = SSA_NAME_DEF_STMT (mask);
+		  g = SSA_NAME_DEF_STMT (match_op[1]);
 		  gcc_assert (is_gimple_assign (g));
 		  bit = gimple_assign_rhs2 (g);
 		}
@@ -3623,19 +3623,30 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
 		 _12 = _3 & mask_7;
 		 _5 = (int) _12;
-	       */
-	      replace_uses_by (use_lhs, lhs);
-	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
-	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
-	      gimple_assign_set_lhs (use_nop_stmt, var);
+
+		 and Convert
+		 _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+		 _2 = (short int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3);
+		 _8 = _1 & mask;
+		 _5 = (short int) _8;
+	      */
+	      gimple_seq stmts = NULL;
+	      match_op[1] = gimple_convert (&stmts,
+					    TREE_TYPE (use_rhs),
+					    match_op[1]);
+	      var = gimple_build (&stmts, BIT_AND_EXPR,
+				  TREE_TYPE (use_rhs), use_rhs, match_op[1]);
 	      gsi = gsi_for_stmt (use_stmt);
 	      gsi_remove (&gsi, true);
 	      release_defs (use_stmt);
-	      gsi_remove (gsip, true);
-	      g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+	      use_stmt = gimple_seq_last_stmt (stmts);
 	      gsi = gsi_for_stmt (use_nop_stmt);
-	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
-	      use_stmt = use_nop_stmt;
+	      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+	      gimple_assign_set_rhs_with_ops (&gsi, CONVERT_EXPR, var);
+	      update_stmt (use_nop_stmt);
 	    }
 	}
       else
@@ -3671,55 +3682,47 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       else if (TREE_CODE (mask) == SSA_NAME)
 	{
 	  gimple *g = SSA_NAME_DEF_STMT (mask);
-	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	  tree match_op;
+	  if (gimple_nop_convert (mask, &match_op, NULL))
 	    {
-	      if (!is_gimple_assign (g)
-		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
-		return;
-	      mask = gimple_assign_rhs1 (g);
+	      mask = match_op;
 	      if (TREE_CODE (mask) != SSA_NAME)
 		return;
 	      g = SSA_NAME_DEF_STMT (mask);
 	    }
 	  if (!is_gimple_assign (g))
 	    return;
-	  rhs_code = gimple_assign_rhs_code (g);
-	  if (rhs_code != LSHIFT_EXPR)
-	    {
-	      if (rhs_code != NOP_EXPR)
-		return;
 
-	      /* Handle
-		 _1 = 1 << bit_4(D);
-		 mask_5 = (unsigned int) _1;
-		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
-		 _3 = _2 & mask_5;
-		 */
-	      tree nop_lhs = gimple_assign_lhs (g);
-	      tree nop_rhs = gimple_assign_rhs1 (g);
-	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
-		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
 		return;
-	      g = SSA_NAME_DEF_STMT (nop_rhs);
-	      if (!is_gimple_assign (g)
-		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
 		return;
+	      g = SSA_NAME_DEF_STMT (mask);
 	    }
-	  if (!integer_onep (gimple_assign_rhs1 (g)))
+
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR
+	      || !integer_onep (gimple_assign_rhs1 (g)))
 	    return;
 	  bit = gimple_assign_rhs2 (g);
 	}
       else
 	return;
 
+      tree cmp_mask;
       if (gimple_assign_rhs1 (use_stmt) == lhs)
-	{
-	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
-	    return;
-	}
-      else if (gimple_assign_rhs2 (use_stmt) != lhs
-	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
-				    mask, 0))
+	cmp_mask = gimple_assign_rhs2 (use_stmt);
+      else
+	cmp_mask = gimple_assign_rhs1 (use_stmt);
+
+      tree match_op;
+      if (gimple_nop_convert (cmp_mask, &match_op, NULL))
+	cmp_mask = match_op;
+
+      if (!operand_equal_p (cmp_mask, mask, 0))
 	return;
     }