mirror of
https://github.com/Mrs4s/MiraiGo.git
synced 2025-05-04 19:17:38 +08:00
perf: speed up tea encrypt & decrypt
name old time/op new time/op delta TEAen/16-8 260ns ± 0% 245ns ± 0% -5.83% (p=0.008 n=5+5) TEAen/256-8 1.86µs ± 0% 1.73µs ± 0% -6.86% (p=0.008 n=5+5) TEAen/4K-8 27.8µs ± 0% 24.9µs ± 0% -10.19% (p=0.008 n=5+5) TEAde/16-8 223ns ± 1% 215ns ± 1% -3.24% (p=0.008 n=5+5) TEAde/256-8 1.88µs ± 0% 1.70µs ± 0% -9.49% (p=0.000 n=4+5) TEAde/4K-8 28.1µs ± 1% 24.9µs ± 1% -11.43% (p=0.008 n=5+5) name old speed new speed delta TEAen/16-8 61.6MB/s ± 0% 65.4MB/s ± 0% +6.19% (p=0.008 n=5+5) TEAen/256-8 138MB/s ± 0% 148MB/s ± 0% +7.35% (p=0.008 n=5+5) TEAen/4K-8 147MB/s ± 0% 164MB/s ± 0% +11.34% (p=0.008 n=5+5) TEAde/16-8 144MB/s ± 1% 149MB/s ± 1% +3.34% (p=0.008 n=5+5) TEAde/256-8 145MB/s ± 0% 160MB/s ± 0% +10.47% (p=0.016 n=4+5) TEAde/4K-8 146MB/s ± 1% 165MB/s ± 1% +12.90% (p=0.008 n=5+5)
This commit is contained in:
parent
b85fc25cd5
commit
2a92b2755f
124
binary/tea.go
124
binary/tea.go
@ -3,7 +3,12 @@ package binary
|
|||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"reflect"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/segmentio/asm/bswap"
|
||||||
|
|
||||||
|
"github.com/Mrs4s/MiraiGo/internal/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
func xorQ(a, b []byte, c []byte) { // MAGIC
|
func xorQ(a, b []byte, c []byte) { // MAGIC
|
||||||
@ -19,50 +24,31 @@ type TEA [4]uint32
|
|||||||
func (t *TEA) Encrypt(src []byte) (dst []byte) {
|
func (t *TEA) Encrypt(src []byte) (dst []byte) {
|
||||||
lens := len(src)
|
lens := len(src)
|
||||||
fill := 10 - (lens+1)%8
|
fill := 10 - (lens+1)%8
|
||||||
tmp1 := make([]byte, 8) // 非纯src的数据
|
|
||||||
tmp2 := make([]byte, 8)
|
|
||||||
dst = make([]byte, fill+lens+7)
|
dst = make([]byte, fill+lens+7)
|
||||||
// for i := 0; i < fill; i++ {
|
|
||||||
// dst[i] = ' '
|
|
||||||
// } // For test purpose
|
|
||||||
_, _ = rand.Read(dst[0:fill])
|
_, _ = rand.Read(dst[0:fill])
|
||||||
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
|
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
|
||||||
in := 0 // 位置
|
copy(dst[fill:], src)
|
||||||
// #1
|
if cpu.LittleEndian {
|
||||||
if fill < 8 {
|
bswap.Swap64(dst)
|
||||||
in = 8 - fill
|
|
||||||
copy(dst[fill:8], src[:in])
|
|
||||||
}
|
}
|
||||||
copy(tmp2, dst[0:8])
|
|
||||||
t.encode(dst[0:8], dst[0:8])
|
var iv1, iv2, holder int64
|
||||||
out := 8 // 位置
|
var blocks []int64
|
||||||
// #2
|
dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
|
||||||
if fill > 8 {
|
blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
|
||||||
copy(dst[fill:out+8], src[:16-fill])
|
blocksHeader.Data = dstHeader.Data
|
||||||
xorQ(dst[8:16], dst[0:8], dst[8:16]) // 与前一次结果xor
|
blocksHeader.Len = dstHeader.Len / 8
|
||||||
copy(tmp1, dst[8:16])
|
blocksHeader.Cap = blocksHeader.Len
|
||||||
t.encode(dst[8:16], dst[8:16])
|
for i, block := range blocks {
|
||||||
xorQ(dst[8:16], tmp2, dst[8:16]) // 与前一次数据xor
|
holder = block ^ iv1
|
||||||
copy(tmp2, tmp1)
|
iv1 = t.encode(holder)
|
||||||
in = 16 - fill
|
iv1 = iv1 ^ iv2
|
||||||
out = 16
|
iv2 = holder
|
||||||
|
blocks[i] = iv1
|
||||||
}
|
}
|
||||||
// #3+或#4+
|
if cpu.LittleEndian {
|
||||||
lens -= 8
|
bswap.Swap64(dst)
|
||||||
for in < lens {
|
|
||||||
xorQ(src[in:in+8], dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
|
|
||||||
copy(tmp1, dst[out:out+8])
|
|
||||||
t.encode(dst[out:out+8], dst[out:out+8])
|
|
||||||
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
|
|
||||||
copy(tmp2, tmp1)
|
|
||||||
in += 8
|
|
||||||
out += 8
|
|
||||||
}
|
}
|
||||||
tmp3 := make([]byte, 8)
|
|
||||||
copy(tmp3, src[in:])
|
|
||||||
xorQ(tmp3, dst[out-8:out], dst[out:out+8]) // 与前一次结果xor
|
|
||||||
t.encode(dst[out:out+8], dst[out:out+8])
|
|
||||||
xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor
|
|
||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,39 +58,31 @@ func (t *TEA) Decrypt(data []byte) []byte {
|
|||||||
}
|
}
|
||||||
dst := make([]byte, len(data))
|
dst := make([]byte, len(data))
|
||||||
copy(dst, data)
|
copy(dst, data)
|
||||||
t.decode(dst[0:8], dst[0:8])
|
if cpu.LittleEndian {
|
||||||
tmp := make([]byte, 8)
|
bswap.Swap64(dst)
|
||||||
copy(tmp, dst[0:8])
|
}
|
||||||
for in := 8; in < len(data); in += 8 {
|
|
||||||
xorQ(dst[in:in+8], tmp, dst[in:in+8])
|
var iv1, iv2, holder, tmp int64
|
||||||
t.decode(dst[in:in+8], dst[in:in+8])
|
var blocks []int64
|
||||||
xorQ(dst[in:in+8], data[in-8:in], dst[in:in+8])
|
dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst))
|
||||||
xorQ(dst[in:in+8], data[in-8:in], tmp)
|
blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks))
|
||||||
|
blocksHeader.Data = dstHeader.Data
|
||||||
|
blocksHeader.Len = dstHeader.Len / 8
|
||||||
|
blocksHeader.Cap = blocksHeader.Len
|
||||||
|
for i, block := range blocks {
|
||||||
|
tmp = t.decode(block ^ iv2)
|
||||||
|
iv2 = tmp
|
||||||
|
holder = tmp ^ iv1
|
||||||
|
iv1 = block
|
||||||
|
blocks[i] = holder
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpu.LittleEndian {
|
||||||
|
bswap.Swap64(dst)
|
||||||
}
|
}
|
||||||
return dst[dst[0]&7+3 : len(data)-7]
|
return dst[dst[0]&7+3 : len(data)-7]
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:nosplit
|
|
||||||
func unpack(data []byte) (v0, v1 uint32) {
|
|
||||||
v1 = uint32(data[7]) | uint32(data[6])<<8 | uint32(data[5])<<16 | uint32(data[4])<<24
|
|
||||||
v0 = uint32(data[3]) | uint32(data[2])<<8 | uint32(data[1])<<16 | uint32(data[0])<<24
|
|
||||||
return v0, v1
|
|
||||||
}
|
|
||||||
|
|
||||||
//go:nosplit
|
|
||||||
func repack(data []byte, v0, v1 uint32) {
|
|
||||||
_ = data[7] // early bounds check to guarantee safety of writes below
|
|
||||||
data[0] = byte(v0 >> 24)
|
|
||||||
data[1] = byte(v0 >> 16)
|
|
||||||
data[2] = byte(v0 >> 8)
|
|
||||||
data[3] = byte(v0)
|
|
||||||
|
|
||||||
data[4] = byte(v1 >> 24)
|
|
||||||
data[5] = byte(v1 >> 16)
|
|
||||||
data[6] = byte(v1 >> 8)
|
|
||||||
data[7] = byte(v1)
|
|
||||||
}
|
|
||||||
|
|
||||||
var sumTable = [0x10]uint32{
|
var sumTable = [0x10]uint32{
|
||||||
0x9e3779b9,
|
0x9e3779b9,
|
||||||
0x3c6ef372,
|
0x3c6ef372,
|
||||||
@ -125,24 +103,24 @@ var sumTable = [0x10]uint32{
|
|||||||
}
|
}
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (t *TEA) encode(src, dst []byte) {
|
func (t *TEA) encode(n int64) int64 {
|
||||||
v0, v1 := unpack(src)
|
v0, v1 := uint32(n>>32), uint32(n)
|
||||||
for i := 0; i < 0x10; i++ {
|
for i := 0; i < 0x10; i++ {
|
||||||
v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
|
v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
|
||||||
v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
|
v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
|
||||||
}
|
}
|
||||||
repack(dst, v0, v1)
|
return int64(v0)<<32 | int64(v1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 每次8字节
|
// 每次8字节
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (t *TEA) decode(src, dst []byte) {
|
func (t *TEA) decode(n int64) int64 {
|
||||||
v0, v1 := unpack(src)
|
v0, v1 := uint32(n>>32), uint32(n)
|
||||||
for i := 0xf; i >= 0; i-- {
|
for i := 0xf; i >= 0; i-- {
|
||||||
v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
|
v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3])
|
||||||
v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
|
v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1])
|
||||||
}
|
}
|
||||||
repack(dst, v0, v1)
|
return int64(v0)<<32 | int64(v1)
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
|
@ -110,7 +110,11 @@ func BenchmarkTEAen(b *testing.B) {
|
|||||||
benchEncrypt(b, data)
|
benchEncrypt(b, data)
|
||||||
})
|
})
|
||||||
b.Run("4K", func(b *testing.B) {
|
b.Run("4K", func(b *testing.B) {
|
||||||
data := make([]byte, 4096)
|
data := make([]byte, 1024*4)
|
||||||
|
benchEncrypt(b, data)
|
||||||
|
})
|
||||||
|
b.Run("32K", func(b *testing.B) {
|
||||||
|
data := make([]byte, 1024*32)
|
||||||
benchEncrypt(b, data)
|
benchEncrypt(b, data)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -128,4 +132,8 @@ func BenchmarkTEAde(b *testing.B) {
|
|||||||
data := make([]byte, 4096)
|
data := make([]byte, 4096)
|
||||||
benchDecrypt(b, data)
|
benchDecrypt(b, data)
|
||||||
})
|
})
|
||||||
|
b.Run("32K", func(b *testing.B) {
|
||||||
|
data := make([]byte, 1024*32)
|
||||||
|
benchDecrypt(b, data)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
1
go.mod
1
go.mod
@ -5,6 +5,7 @@ go 1.16
|
|||||||
require (
|
require (
|
||||||
github.com/klauspost/compress v1.13.6
|
github.com/klauspost/compress v1.13.6
|
||||||
github.com/pkg/errors v0.9.1
|
github.com/pkg/errors v0.9.1
|
||||||
|
github.com/segmentio/asm v1.1.0
|
||||||
github.com/stretchr/testify v1.3.0
|
github.com/stretchr/testify v1.3.0
|
||||||
github.com/tidwall/gjson v1.11.0
|
github.com/tidwall/gjson v1.11.0
|
||||||
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f
|
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f
|
||||||
|
4
go.sum
4
go.sum
@ -5,10 +5,14 @@ github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
|||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
|
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
|
||||||
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||||
|
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
|
||||||
|
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/segmentio/asm v1.1.0 h1:fkVr8k5J4sKoFjTGVD6r1yKvDKqmvrEh3K7iyVxgBs8=
|
||||||
|
github.com/segmentio/asm v1.1.0/go.mod h1:4EUJGaKsB8ImLUwOGORVsNd9vTRDeh44JGsY4aKp5I4=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
6
internal/cpu/big_endian.go
Normal file
6
internal/cpu/big_endian.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
//go:build armbe || arm64be || mips || mips64 || ppc || ppc64 || s390 || s390x || sparc || sparc64
|
||||||
|
// +build armbe arm64be mips mips64 ppc ppc64 s390 s390x sparc sparc64
|
||||||
|
|
||||||
|
package cpu
|
||||||
|
|
||||||
|
const LittleEndian = false
|
6
internal/cpu/little_endian.go
Normal file
6
internal/cpu/little_endian.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
//go:build 386 || amd64 || arm || arm64 || mipsle || mips64le || ppc64le || riscv || riscv64 || wasm
|
||||||
|
// +build 386 amd64 arm arm64 mipsle mips64le ppc64le riscv riscv64 wasm
|
||||||
|
|
||||||
|
package cpu
|
||||||
|
|
||||||
|
const LittleEndian = true
|
Loading…
x
Reference in New Issue
Block a user