diff mbox

[PULL,57/58] cutils: Add SSE4 version

Message ID 1473786989-54823-58-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Sept. 13, 2016, 5:16 p.m. UTC
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/bufferiszero.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff mbox

Patch

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 4af3caa..bafd3d1 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -114,6 +114,13 @@  ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO)
 
 #ifdef CONFIG_AVX2_OPT
 #pragma GCC push_options
+#pragma GCC target("sse4")
+#include <smmintrin.h>
+#define SSE4_NONZERO(X)  !_mm_testz_si128((X), (X))
+ACCEL_BUFFER_ZERO(buffer_zero_sse4, 64, __m128i, SSE4_NONZERO)
+#pragma GCC pop_options
+
+#pragma GCC push_options
 #pragma GCC target("avx2")
 #include <immintrin.h>
 #define AVX2_NONZERO(X)  !_mm256_testz_si256((X), (X))
@@ -182,6 +189,9 @@  static bool select_accel_fn(const void *buf, size_t len)
     if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
         return buffer_zero_avx2(buf, len);
     }
+    if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) {
+        return buffer_zero_sse4(buf, len);
+    }
 #endif
     if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
         return buffer_zero_sse2(buf, len);