From 9888d7fa31082813aa31db067a84c749f8cd77b6 Mon Sep 17 00:00:00 2001
From: Aaron O'Mullan <aaron.omullan@gmail.com>
Date: Mon, 2 Sep 2024 15:29:26 +0900
Subject: [PATCH] perf(simd): avx2 fallack to swar instead of sse4.2

This has massive implications on the default runtime perf, improving how the code is lowered/inlined. (Falling back to SSE4.2 for a handful of bytes was wasteful).

Should supersede #175, #156
---
 src/simd/avx2.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/simd/avx2.rs b/src/simd/avx2.rs
index 6a7edc1..c1a41f9 100644
--- a/src/simd/avx2.rs
+++ b/src/simd/avx2.rs
@@ -1,7 +1,7 @@
 use crate::iter::Bytes;
 
 #[inline]
-#[target_feature(enable = "avx2", enable = "sse4.2")]
+#[target_feature(enable = "avx2")]
 pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
     while bytes.as_ref().len() >= 32 {
         let advance = match_url_char_32_avx(bytes.as_ref());
@@ -11,8 +11,8 @@ pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
             return;
         }
     }
-    // do both, since avx2 only works when bytes.len() >= 32
-    super::sse42::match_uri_vectored(bytes)
+    // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
+    super::swar::match_uri_vectored(bytes)
 }
 
 #[inline(always)]
@@ -56,7 +56,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
     r.trailing_zeros() as usize
 }
 
-#[target_feature(enable = "avx2", enable = "sse4.2")]
+#[target_feature(enable = "avx2")]
 pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
     while bytes.as_ref().len() >= 32 {
         let advance = match_header_value_char_32_avx(bytes.as_ref());
@@ -66,8 +66,8 @@ pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
             return;
         }
     }
-    // do both, since avx2 only works when bytes.len() >= 32
-    super::sse42::match_header_value_vectored(bytes)
+    // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
+    super::swar::match_header_value_vectored(bytes)
 }
 
 #[inline(always)]