1
0
mirror of https://github.com/Mrs4s/MiraiGo.git synced 2025-05-04 19:17:38 +08:00

perf: speed up tea encrypt & decrypt

name         old time/op    new time/op    delta
TEAen/16-8      260ns ± 0%     245ns ± 0%   -5.83%  (p=0.008 n=5+5)
TEAen/256-8    1.86µs ± 0%    1.73µs ± 0%   -6.86%  (p=0.008 n=5+5)
TEAen/4K-8     27.8µs ± 0%    24.9µs ± 0%  -10.19%  (p=0.008 n=5+5)
TEAde/16-8      223ns ± 1%     215ns ± 1%   -3.24%  (p=0.008 n=5+5)
TEAde/256-8    1.88µs ± 0%    1.70µs ± 0%   -9.49%  (p=0.000 n=4+5)
TEAde/4K-8     28.1µs ± 1%    24.9µs ± 1%  -11.43%  (p=0.008 n=5+5)

name         old speed      new speed      delta
TEAen/16-8   61.6MB/s ± 0%  65.4MB/s ± 0%   +6.19%  (p=0.008 n=5+5)
TEAen/256-8   138MB/s ± 0%   148MB/s ± 0%   +7.35%  (p=0.008 n=5+5)
TEAen/4K-8    147MB/s ± 0%   164MB/s ± 0%  +11.34%  (p=0.008 n=5+5)
TEAde/16-8    144MB/s ± 1%   149MB/s ± 1%   +3.34%  (p=0.008 n=5+5)
TEAde/256-8   145MB/s ± 0%   160MB/s ± 0%  +10.47%  (p=0.016 n=4+5)
TEAde/4K-8    146MB/s ± 1%   165MB/s ± 1%  +12.90%  (p=0.008 n=5+5)
This commit is contained in:
wdvxdr 2021-11-12 11:37:29 +08:00
parent b85fc25cd5
commit 2a92b2755f
No known key found for this signature in database
GPG Key ID: 703F8C071DE7A1B6
6 changed files with 77 additions and 74 deletions

View File

@ -3,7 +3,12 @@ package binary
import ( import (
"encoding/binary" "encoding/binary"
"math/rand" "math/rand"
"reflect"
"unsafe" "unsafe"
"github.com/segmentio/asm/bswap"
"github.com/Mrs4s/MiraiGo/internal/cpu"
) )
func xorQ(a, b []byte, c []byte) { // MAGIC func xorQ(a, b []byte, c []byte) { // MAGIC
@ -19,50 +24,31 @@ type TEA [4]uint32
func (t *TEA) Encrypt(src []byte) (dst []byte) { func (t *TEA) Encrypt(src []byte) (dst []byte) {
lens := len(src) lens := len(src)
fill := 10 - (lens+1)%8 fill := 10 - (lens+1)%8
tmp1 := make([]byte, 8) // 非纯src的数据
tmp2 := make([]byte, 8)
dst = make([]byte, fill+lens+7) dst = make([]byte, fill+lens+7)
// for i := 0; i < fill; i++ {
// dst[i] = ' '
// } // For test purpose
_, _ = rand.Read(dst[0:fill]) _, _ = rand.Read(dst[0:fill])
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
in := 0 // 位置 copy(dst[fill:], src)
// #1 if cpu.LittleEndian {
if fill < 8 { bswap.Swap64(dst)
in = 8 - fill
copy(dst[fill:8], src[:in])
} }
copy(tmp2, dst[0:8])
t.encode(dst[0:8], dst[0:8]) var iv1, iv2, holder int64
out := 8 // 位置 var blocks []int64
// #2 dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
if fill > 8 { blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
copy(dst[fill:out+8], src[:16-fill]) blocksHeader.Data = dstHeader.Data
xorQ(dst[8:16], dst[0:8], dst[8:16]) // 与前一次结果xor blocksHeader.Len = dstHeader.Len / 8
copy(tmp1, dst[8:16]) blocksHeader.Cap = blocksHeader.Len
t.encode(dst[8:16], dst[8:16]) for i, block := range blocks {
xorQ(dst[8:16], tmp2, dst[8:16]) // 与前一次数据xor holder = block ^ iv1
copy(tmp2, tmp1) iv1 = t.encode(holder)
in = 16 - fill iv1 = iv1 ^ iv2
out = 16 iv2 = holder
blocks[i] = iv1
} }
// #3+或#4+ if cpu.LittleEndian {
lens -= 8 bswap.Swap64(dst)
for in < lens {
xorQ(src[in:in+8], dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
copy(tmp1, dst[out:out+8])
t.encode(dst[out:out+8], dst[out:out+8])
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
copy(tmp2, tmp1)
in += 8
out += 8
} }
tmp3 := make([]byte, 8)
copy(tmp3, src[in:])
xorQ(tmp3, dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
t.encode(dst[out:out+8], dst[out:out+8])
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
return dst return dst
} }
@ -72,39 +58,31 @@ func (t *TEA) Decrypt(data []byte) []byte {
} }
dst := make([]byte, len(data)) dst := make([]byte, len(data))
copy(dst, data) copy(dst, data)
t.decode(dst[0:8], dst[0:8]) if cpu.LittleEndian {
tmp := make([]byte, 8) bswap.Swap64(dst)
copy(tmp, dst[0:8]) }
for in := 8; in < len(data); in += 8 {
xorQ(dst[in:in+8], tmp, dst[in:in+8]) var iv1, iv2, holder, tmp int64
t.decode(dst[in:in+8], dst[in:in+8]) var blocks []int64
xorQ(dst[in:in+8], data[in-8:in], dst[in:in+8]) dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
xorQ(dst[in:in+8], data[in-8:in], tmp) blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
blocksHeader.Data = dstHeader.Data
blocksHeader.Len = dstHeader.Len / 8
blocksHeader.Cap = blocksHeader.Len
for i, block := range blocks {
tmp = t.decode(block ^ iv2)
iv2 = tmp
holder = tmp ^ iv1
iv1 = block
blocks[i] = holder
}
if cpu.LittleEndian {
bswap.Swap64(dst)
} }
return dst[dst[0]&7+3 : len(data)-7] return dst[dst[0]&7+3 : len(data)-7]
} }
//go:nosplit
func unpack(data []byte) (v0, v1 uint32) {
v1 = uint32(data[7]) | uint32(data[6])<<8 | uint32(data[5])<<16 | uint32(data[4])<<24
v0 = uint32(data[3]) | uint32(data[2])<<8 | uint32(data[1])<<16 | uint32(data[0])<<24
return v0, v1
}
//go:nosplit
func repack(data []byte, v0, v1 uint32) {
_ = data[7] // early bounds check to guarantee safety of writes below
data[0] = byte(v0 >> 24)
data[1] = byte(v0 >> 16)
data[2] = byte(v0 >> 8)
data[3] = byte(v0)
data[4] = byte(v1 >> 24)
data[5] = byte(v1 >> 16)
data[6] = byte(v1 >> 8)
data[7] = byte(v1)
}
var sumTable = [0x10]uint32{ var sumTable = [0x10]uint32{
0x9e3779b9, 0x9e3779b9,
0x3c6ef372, 0x3c6ef372,
@ -125,24 +103,24 @@ var sumTable = [0x10]uint32{
} }
//go:nosplit //go:nosplit
func (t *TEA) encode(src, dst []byte) { func (t *TEA) encode(n int64) int64 {
v0, v1 := unpack(src) v0, v1 := uint32(n>>32), uint32(n)
for i := 0; i < 0x10; i++ { for i := 0; i < 0x10; i++ {
v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1]) v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3]) v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
} }
repack(dst, v0, v1) return int64(v0)<<32 | int64(v1)
} }
// 每次8字节 // 每次8字节
//go:nosplit //go:nosplit
func (t *TEA) decode(src, dst []byte) { func (t *TEA) decode(n int64) int64 {
v0, v1 := unpack(src) v0, v1 := uint32(n>>32), uint32(n)
for i := 0xf; i >= 0; i-- { for i := 0xf; i >= 0; i-- {
v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3]) v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1]) v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
} }
repack(dst, v0, v1) return int64(v0)<<32 | int64(v1)
} }
//go:nosplit //go:nosplit

View File

@ -110,7 +110,11 @@ func BenchmarkTEAen(b *testing.B) {
benchEncrypt(b, data) benchEncrypt(b, data)
}) })
b.Run("4K", func(b *testing.B) { b.Run("4K", func(b *testing.B) {
data := make([]byte, 4096) data := make([]byte, 1024*4)
benchEncrypt(b, data)
})
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchEncrypt(b, data) benchEncrypt(b, data)
}) })
} }
@ -128,4 +132,8 @@ func BenchmarkTEAde(b *testing.B) {
data := make([]byte, 4096) data := make([]byte, 4096)
benchDecrypt(b, data) benchDecrypt(b, data)
}) })
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchDecrypt(b, data)
})
} }

1
go.mod
View File

@ -5,6 +5,7 @@ go 1.16
require ( require (
github.com/klauspost/compress v1.13.6 github.com/klauspost/compress v1.13.6
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/segmentio/asm v1.1.0
github.com/stretchr/testify v1.3.0 github.com/stretchr/testify v1.3.0
github.com/tidwall/gjson v1.11.0 github.com/tidwall/gjson v1.11.0
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f

4
go.sum
View File

@ -5,10 +5,14 @@ github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/segmentio/asm v1.1.0 h1:fkVr8k5J4sKoFjTGVD6r1yKvDKqmvrEh3K7iyVxgBs8=
github.com/segmentio/asm v1.1.0/go.mod h1:4EUJGaKsB8ImLUwOGORVsNd9vTRDeh44JGsY4aKp5I4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

View File

@ -0,0 +1,6 @@
//go:build armbe || arm64be || mips || mips64 || ppc || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 ppc ppc64 s390 s390x sparc sparc64
package cpu
const LittleEndian = false

View File

@ -0,0 +1,6 @@
//go:build 386 || amd64 || arm || arm64 || mipsle || mips64le || ppc64le || riscv || riscv64 || wasm
// +build 386 amd64 arm arm64 mipsle mips64le ppc64le riscv riscv64 wasm
package cpu
const LittleEndian = true