From fe8ffc55445fd570565f50d2f59f42cca455e7e1 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 21 Apr 2016 15:39:47 +1000 Subject: [PATCH] noodle: use SSE palignr --- src/hwlm/noodle_engine.c | 3 ++- src/hwlm/noodle_engine_sse.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 621f89b0..e2f80a59 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include "util/compare.h" #include "util/masked_move.h" #include "util/simd_utils.h" +#include "util/simd_utils_ssse3.h" #include #include diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index 956fd82e..b3673246 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -190,8 +190,8 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, m128 v = noCase ? and128(load128(d), caseMask) : load128(d); m128 z1 = eq128(mask1, v); m128 z2 = eq128(mask2, v); - u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2)); - lastz1 = _mm_srli_si128(z1, 15); + u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2)); + lastz1 = z1; // On large packet buffers, this prefetch appears to get us about 2%. __builtin_prefetch(d + 128);