1
0
mirror of https://github.com/Mrs4s/MiraiGo.git synced 2025-05-04 11:07:40 +08:00

perf: speed up tea encrypt & decrypt

name         old time/op    new time/op    delta
TEAen/16-8      260ns ± 0%     245ns ± 0%   -5.83%  (p=0.008 n=5+5)
TEAen/256-8    1.86µs ± 0%    1.73µs ± 0%   -6.86%  (p=0.008 n=5+5)
TEAen/4K-8     27.8µs ± 0%    24.9µs ± 0%  -10.19%  (p=0.008 n=5+5)
TEAde/16-8      223ns ± 1%     215ns ± 1%   -3.24%  (p=0.008 n=5+5)
TEAde/256-8    1.88µs ± 0%    1.70µs ± 0%   -9.49%  (p=0.000 n=4+5)
TEAde/4K-8     28.1µs ± 1%    24.9µs ± 1%  -11.43%  (p=0.008 n=5+5)

name         old speed      new speed      delta
TEAen/16-8   61.6MB/s ± 0%  65.4MB/s ± 0%   +6.19%  (p=0.008 n=5+5)
TEAen/256-8   138MB/s ± 0%   148MB/s ± 0%   +7.35%  (p=0.008 n=5+5)
TEAen/4K-8    147MB/s ± 0%   164MB/s ± 0%  +11.34%  (p=0.008 n=5+5)
TEAde/16-8    144MB/s ± 1%   149MB/s ± 1%   +3.34%  (p=0.008 n=5+5)
TEAde/256-8   145MB/s ± 0%   160MB/s ± 0%  +10.47%  (p=0.016 n=4+5)
TEAde/4K-8    146MB/s ± 1%   165MB/s ± 1%  +12.90%  (p=0.008 n=5+5)
This commit is contained in:
wdvxdr 2021-11-12 11:37:29 +08:00
parent b85fc25cd5
commit 2a92b2755f
No known key found for this signature in database
GPG Key ID: 703F8C071DE7A1B6
6 changed files with 77 additions and 74 deletions

View File

@ -3,7 +3,12 @@ package binary
import (
"encoding/binary"
"math/rand"
"reflect"
"unsafe"
"github.com/segmentio/asm/bswap"
"github.com/Mrs4s/MiraiGo/internal/cpu"
)
func xorQ(a, b []byte, c []byte) { // MAGIC
@ -19,50 +24,31 @@ type TEA [4]uint32
func (t *TEA) Encrypt(src []byte) (dst []byte) {
lens := len(src)
fill := 10 - (lens+1)%8
tmp1 := make([]byte, 8) // 非纯src的数据
tmp2 := make([]byte, 8)
dst = make([]byte, fill+lens+7)
// for i := 0; i < fill; i++ {
// dst[i] = ' '
// } // For test purpose
_, _ = rand.Read(dst[0:fill])
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
in := 0 // 位置
// #1
if fill < 8 {
in = 8 - fill
copy(dst[fill:8], src[:in])
copy(dst[fill:], src)
if cpu.LittleEndian {
bswap.Swap64(dst)
}
copy(tmp2, dst[0:8])
t.encode(dst[0:8], dst[0:8])
out := 8 // 位置
// #2
if fill > 8 {
copy(dst[fill:out+8], src[:16-fill])
xorQ(dst[8:16], dst[0:8], dst[8:16]) // 与前一次结果xor
copy(tmp1, dst[8:16])
t.encode(dst[8:16], dst[8:16])
xorQ(dst[8:16], tmp2, dst[8:16]) // 与前一次数据xor
copy(tmp2, tmp1)
in = 16 - fill
out = 16
var iv1, iv2, holder int64
var blocks []int64
dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
blocksHeader.Data = dstHeader.Data
blocksHeader.Len = dstHeader.Len / 8
blocksHeader.Cap = blocksHeader.Len
for i, block := range blocks {
holder = block ^ iv1
iv1 = t.encode(holder)
iv1 = iv1 ^ iv2
iv2 = holder
blocks[i] = iv1
}
// #3+或#4+
lens -= 8
for in < lens {
xorQ(src[in:in+8], dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
copy(tmp1, dst[out:out+8])
t.encode(dst[out:out+8], dst[out:out+8])
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
copy(tmp2, tmp1)
in += 8
out += 8
if cpu.LittleEndian {
bswap.Swap64(dst)
}
tmp3 := make([]byte, 8)
copy(tmp3, src[in:])
xorQ(tmp3, dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
t.encode(dst[out:out+8], dst[out:out+8])
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
return dst
}
@ -72,39 +58,31 @@ func (t *TEA) Decrypt(data []byte) []byte {
}
dst := make([]byte, len(data))
copy(dst, data)
t.decode(dst[0:8], dst[0:8])
tmp := make([]byte, 8)
copy(tmp, dst[0:8])
for in := 8; in < len(data); in += 8 {
xorQ(dst[in:in+8], tmp, dst[in:in+8])
t.decode(dst[in:in+8], dst[in:in+8])
xorQ(dst[in:in+8], data[in-8:in], dst[in:in+8])
xorQ(dst[in:in+8], data[in-8:in], tmp)
if cpu.LittleEndian {
bswap.Swap64(dst)
}
var iv1, iv2, holder, tmp int64
var blocks []int64
dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
blocksHeader.Data = dstHeader.Data
blocksHeader.Len = dstHeader.Len / 8
blocksHeader.Cap = blocksHeader.Len
for i, block := range blocks {
tmp = t.decode(block ^ iv2)
iv2 = tmp
holder = tmp ^ iv1
iv1 = block
blocks[i] = holder
}
if cpu.LittleEndian {
bswap.Swap64(dst)
}
return dst[dst[0]&7+3 : len(data)-7]
}
//go:nosplit
func unpack(data []byte) (v0, v1 uint32) {
v1 = uint32(data[7]) | uint32(data[6])<<8 | uint32(data[5])<<16 | uint32(data[4])<<24
v0 = uint32(data[3]) | uint32(data[2])<<8 | uint32(data[1])<<16 | uint32(data[0])<<24
return v0, v1
}
//go:nosplit
func repack(data []byte, v0, v1 uint32) {
_ = data[7] // early bounds check to guarantee safety of writes below
data[0] = byte(v0 >> 24)
data[1] = byte(v0 >> 16)
data[2] = byte(v0 >> 8)
data[3] = byte(v0)
data[4] = byte(v1 >> 24)
data[5] = byte(v1 >> 16)
data[6] = byte(v1 >> 8)
data[7] = byte(v1)
}
var sumTable = [0x10]uint32{
0x9e3779b9,
0x3c6ef372,
@ -125,24 +103,24 @@ var sumTable = [0x10]uint32{
}
//go:nosplit
func (t *TEA) encode(src, dst []byte) {
v0, v1 := unpack(src)
func (t *TEA) encode(n int64) int64 {
v0, v1 := uint32(n>>32), uint32(n)
for i := 0; i < 0x10; i++ {
v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
}
repack(dst, v0, v1)
return int64(v0)<<32 | int64(v1)
}
// 每次8字节
//go:nosplit
func (t *TEA) decode(src, dst []byte) {
v0, v1 := unpack(src)
func (t *TEA) decode(n int64) int64 {
v0, v1 := uint32(n>>32), uint32(n)
for i := 0xf; i >= 0; i-- {
v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
}
repack(dst, v0, v1)
return int64(v0)<<32 | int64(v1)
}
//go:nosplit

View File

@ -110,7 +110,11 @@ func BenchmarkTEAen(b *testing.B) {
benchEncrypt(b, data)
})
b.Run("4K", func(b *testing.B) {
data := make([]byte, 4096)
data := make([]byte, 1024*4)
benchEncrypt(b, data)
})
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchEncrypt(b, data)
})
}
@ -128,4 +132,8 @@ func BenchmarkTEAde(b *testing.B) {
data := make([]byte, 4096)
benchDecrypt(b, data)
})
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchDecrypt(b, data)
})
}

1
go.mod
View File

@ -5,6 +5,7 @@ go 1.16
require (
github.com/klauspost/compress v1.13.6
github.com/pkg/errors v0.9.1
github.com/segmentio/asm v1.1.0
github.com/stretchr/testify v1.3.0
github.com/tidwall/gjson v1.11.0
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f

4
go.sum
View File

@ -5,10 +5,14 @@ github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/segmentio/asm v1.1.0 h1:fkVr8k5J4sKoFjTGVD6r1yKvDKqmvrEh3K7iyVxgBs8=
github.com/segmentio/asm v1.1.0/go.mod h1:4EUJGaKsB8ImLUwOGORVsNd9vTRDeh44JGsY4aKp5I4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

View File

@ -0,0 +1,6 @@
//go:build armbe || arm64be || mips || mips64 || ppc || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 ppc ppc64 s390 s390x sparc sparc64
package cpu
const LittleEndian = false

View File

@ -0,0 +1,6 @@
//go:build 386 || amd64 || arm || arm64 || mipsle || mips64le || ppc64le || riscv || riscv64 || wasm
// +build 386 amd64 arm arm64 mipsle mips64le ppc64le riscv riscv64 wasm
package cpu
const LittleEndian = true