Skip to content

Commit

Permalink
SSSE3: enc: factor encoding loop into inline function
Browse files Browse the repository at this point in the history
  • Loading branch information
aklomp committed Nov 23, 2019
1 parent eecda43 commit a5b6739
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 20 deletions.
3 changes: 2 additions & 1 deletion lib/arch/avx/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "../ssse3/dec_reshuffle.c"
#include "../ssse3/enc_translate.c"
#include "../ssse3/enc_reshuffle.c"
#include "../ssse3/enc_loop.c"

#endif // HAVE_AVX

BASE64_ENC_FUNCTION(avx)
{
#if HAVE_AVX
#include "../generic/enc_head.c"
#include "../ssse3/enc_loop.c"
enc_loop_ssse3(&c, &srclen, &o, &outl);
#include "../generic/enc_tail.c"
#else
BASE64_ENC_STUB
Expand Down
3 changes: 2 additions & 1 deletion lib/arch/sse41/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "../ssse3/dec_reshuffle.c"
#include "../ssse3/enc_translate.c"
#include "../ssse3/enc_reshuffle.c"
#include "../ssse3/enc_loop.c"

#endif // HAVE_SSE41

BASE64_ENC_FUNCTION(sse41)
{
#if HAVE_SSE41
#include "../generic/enc_head.c"
#include "../ssse3/enc_loop.c"
enc_loop_ssse3(&c, &srclen, &o, &outl);
#include "../generic/enc_tail.c"
#else
BASE64_ENC_STUB
Expand Down
3 changes: 2 additions & 1 deletion lib/arch/sse42/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "../ssse3/dec_reshuffle.c"
#include "../ssse3/enc_translate.c"
#include "../ssse3/enc_reshuffle.c"
#include "../ssse3/enc_loop.c"

#endif // HAVE_SSE42

BASE64_ENC_FUNCTION(sse42)
{
#if HAVE_SSE42
#include "../generic/enc_head.c"
#include "../ssse3/enc_loop.c"
enc_loop_ssse3(&c, &srclen, &o, &outl);
#include "../generic/enc_tail.c"
#else
BASE64_ENC_STUB
Expand Down
3 changes: 2 additions & 1 deletion lib/arch/ssse3/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "dec_reshuffle.c"
#include "enc_reshuffle.c"
#include "enc_translate.c"
#include "enc_loop.c"

#endif // HAVE_SSSE3

BASE64_ENC_FUNCTION(ssse3)
{
#if HAVE_SSSE3
#include "../generic/enc_head.c"
#include "enc_loop.c"
enc_loop_ssse3(&c, &srclen, &o, &outl);
#include "../generic/enc_tail.c"
#else
BASE64_ENC_STUB
Expand Down
44 changes: 28 additions & 16 deletions lib/arch/ssse3/enc_loop.c
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
// If we have SSSE3 support, pick off 12 bytes at a time for as long as we can.
// But because we read 16 bytes at a time, ensure we have enough room to do a
// full 16-byte read without segfaulting:
while (srclen >= 16)
static inline void
enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
{
// Load string:
__m128i str = _mm_loadu_si128((__m128i *)c);
if (*slen < 16) {
return;
}

// Reshuffle:
str = enc_reshuffle(str);
// Process blocks of 12 bytes at a time. Because blocks are loaded 16
// bytes at a time, ensure that there will be at least 4 remaining
// bytes after the last round, so that the final read will not pass
// beyond the bounds of the input buffer:
size_t rounds = (*slen - 4) / 12;

// Translate reshuffled bytes to the Base64 alphabet:
str = enc_translate(str);
*slen -= rounds * 12; // 12 bytes consumed per round
*olen += rounds * 16; // 16 bytes produced per round

// Store:
_mm_storeu_si128((__m128i *)o, str);
do {
// Load string:
__m128i str = _mm_loadu_si128((__m128i *) *s);

c += 12; // 3 * 4 bytes of input
o += 16; // 4 * 4 bytes of output
outl += 16;
srclen -= 12;
// Reshuffle:
str = enc_reshuffle(str);

// Translate reshuffled bytes to the Base64 alphabet:
str = enc_translate(str);

// Store:
_mm_storeu_si128((__m128i *) *o, str);

*s += 12;
*o += 16;

} while (--rounds > 0);
}

0 comments on commit a5b6739

Please sign in to comment.