From 574c4e57b1467225f03936342e477ee0d587a2dc Mon Sep 17 00:00:00 2001 From: wdvxdr Date: Wed, 17 Nov 2021 13:32:26 +0800 Subject: [PATCH] perf(tea): unrolling encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta TEAen/16-8 237ns ± 0% 227ns ± 1% -4.03% (p=0.008 n=5+5) TEAen/256-8 1.69µs ± 0% 1.60µs ± 1% -4.83% (p=0.008 n=5+5) TEAen/4K-8 25.0µs ± 1% 23.8µs ± 2% -4.73% (p=0.008 n=5+5) TEAen/32K-8 199µs ± 1% 188µs ± 1% -5.30% (p=0.008 n=5+5) TEAde/16-8 196ns ± 1% 194ns ± 1% -0.89% (p=0.016 n=5+5) TEAde/256-8 1.56µs ± 0% 1.54µs ± 0% -1.50% (p=0.008 n=5+5) TEAde/4K-8 23.5µs ± 0% 23.2µs ± 0% -1.46% (p=0.008 n=5+5) TEAde/32K-8 187µs ± 1% 184µs ± 1% -1.50% (p=0.008 n=5+5) name old speed new speed delta TEAen/16-8 67.6MB/s ± 1% 70.5MB/s ± 1% +4.20% (p=0.008 n=5+5) TEAen/256-8 152MB/s ± 0% 160MB/s ± 1% +5.07% (p=0.008 n=5+5) TEAen/4K-8 164MB/s ± 1% 172MB/s ± 2% +4.98% (p=0.008 n=5+5) TEAen/32K-8 165MB/s ± 1% 174MB/s ± 1% +5.60% (p=0.008 n=5+5) TEAde/16-8 163MB/s ± 1% 165MB/s ± 1% +0.90% (p=0.016 n=5+5) TEAde/256-8 174MB/s ± 0% 177MB/s ± 0% +1.51% (p=0.008 n=5+5) TEAde/4K-8 175MB/s ± 0% 177MB/s ± 0% +1.48% (p=0.008 n=5+5) TEAde/32K-8 175MB/s ± 1% 178MB/s ± 1% +1.52% (p=0.008 n=5+5) --- binary/tea.go | 122 +++++++++++++++++++++++++-------------------- binary/tea_test.go | 12 +++++ 2 files changed, 79 insertions(+), 55 deletions(-) diff --git a/binary/tea.go b/binary/tea.go index 1465f6a4..371dac04 100644 --- a/binary/tea.go +++ b/binary/tea.go @@ -48,32 +48,44 @@ func (t TEA) Decrypt(data []byte) []byte { return dst[dst[0]&7+3 : len(data)-7] } -var sumTable = [0x10]uint32{ - 0x9e3779b9, - 0x3c6ef372, - 0xdaa66d2b, - 0x78dde6e4, - 0x1715609d, - 0xb54cda56, - 0x5384540f, - 0xf1bbcdc8, - 0x8ff34781, - 0x2e2ac13a, - 0xcc623af3, - 0x6a99b4ac, - 0x08d12e65, - 0xa708a81e, - 0x454021d7, - 0xe3779b90, -} - //go:nosplit func (t *TEA) encode(n uint64) uint64 { v0, v1 := uint32(n>>32), uint32(n) - for i := 0; i < 0x10; i++ { - v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1]) - v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3]) - } + t0, t1, t2, t3 := t[0], t[1], t[2], t[3] + + v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + return uint64(v0)<<32 | uint64(v1) } @@ -83,38 +95,38 @@ func (t *TEA) decode(n uint64) uint64 { v0, v1 := uint32(n>>32), uint32(n) t0, t1, t2, t3 := t[0], t[1], t[2], t[3] - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xe3779b90) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xe3779b90) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x454021d7) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x454021d7) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xa708a81e) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xa708a81e) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x8d12e65) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x8d12e65) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x6a99b4ac) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x6a99b4ac) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xcc623af3) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xcc623af3) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x2e2ac13a) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x2e2ac13a) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x8ff34781) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x8ff34781) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xf1bbcdc8) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xf1bbcdc8) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x5384540f) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x5384540f) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xb54cda56) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xb54cda56) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x1715609d) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x1715609d) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x78dde6e4) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x78dde6e4) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0xdaa66d2b) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0xdaa66d2b) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x3c6ef372) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x3c6ef372) ^ ((v1 >> 5) + t1) - v1 -= ((v0 << 4) + t2) ^ (v0 + 0x9e3779b9) ^ ((v0 >> 5) + t3) - v0 -= ((v1 << 4) + t0) ^ (v1 + 0x9e3779b9) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) return uint64(v0)<<32 | uint64(v1) } diff --git a/binary/tea_test.go b/binary/tea_test.go index b455b8c7..62bf3a9a 100644 --- a/binary/tea_test.go +++ b/binary/tea_test.go @@ -137,3 +137,15 @@ func BenchmarkTEAde(b *testing.B) { benchDecrypt(b, data) }) } + +func BenchmarkTEA_encode(b *testing.B) { + for i := 0; i < b.N; i++ { + testTEA.encode(114514) + } +} + +func BenchmarkTEA_decode(b *testing.B) { + for i := 0; i < b.N; i++ { + testTEA.decode(114514) + } +}