diff --git a/binary/tea.go b/binary/tea.go index 218aebc5..058a6c57 100644 --- a/binary/tea.go +++ b/binary/tea.go @@ -3,7 +3,12 @@ package binary import ( "encoding/binary" "math/rand" + "reflect" "unsafe" + + "github.com/segmentio/asm/bswap" + + "github.com/Mrs4s/MiraiGo/internal/cpu" ) func xorQ(a, b []byte, c []byte) { // MAGIC @@ -19,50 +24,31 @@ type TEA [4]uint32 func (t *TEA) Encrypt(src []byte) (dst []byte) { lens := len(src) fill := 10 - (lens+1)%8 - tmp1 := make([]byte, 8) // 非纯src的数据 - tmp2 := make([]byte, 8) dst = make([]byte, fill+lens+7) - // for i := 0; i < fill; i++ { - // dst[i] = ' ' - // } // For test purpose _, _ = rand.Read(dst[0:fill]) dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - in := 0 // 位置 - // #1 - if fill < 8 { - in = 8 - fill - copy(dst[fill:8], src[:in]) + copy(dst[fill:], src) + if cpu.LittleEndian { + bswap.Swap64(dst) } - copy(tmp2, dst[0:8]) - t.encode(dst[0:8], dst[0:8]) - out := 8 // 位置 - // #2 - if fill > 8 { - copy(dst[fill:out+8], src[:16-fill]) - xorQ(dst[8:16], dst[0:8], dst[8:16]) // 与前一次结果xor - copy(tmp1, dst[8:16]) - t.encode(dst[8:16], dst[8:16]) - xorQ(dst[8:16], tmp2, dst[8:16]) // 与前一次数据xor - copy(tmp2, tmp1) - in = 16 - fill - out = 16 + + var iv1, iv2, holder int64 + var blocks []int64 + dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) + blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks)) + blocksHeader.Data = dstHeader.Data + blocksHeader.Len = dstHeader.Len / 8 + blocksHeader.Cap = blocksHeader.Len + for i, block := range blocks { + holder = block ^ iv1 + iv1 = t.encode(holder) + iv1 = iv1 ^ iv2 + iv2 = holder + blocks[i] = iv1 } - // #3+或#4+ - lens -= 8 - for in < lens { - xorQ(src[in:in+8], dst[out-8:out], dst[out:out+8]) // 与前一次结果xor - copy(tmp1, dst[out:out+8]) - t.encode(dst[out:out+8], dst[out:out+8]) - xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor - copy(tmp2, tmp1) - in += 8 - out += 8 + if cpu.LittleEndian { + bswap.Swap64(dst) } - tmp3 := make([]byte, 8) - copy(tmp3, src[in:]) - xorQ(tmp3, dst[out-8:out], dst[out:out+8]) // 与前一次结果xor - t.encode(dst[out:out+8], dst[out:out+8]) - xorQ(dst[out:out+8], tmp2, dst[out:out+8]) // 与前一次数据xor return dst } @@ -72,39 +58,31 @@ func (t *TEA) Decrypt(data []byte) []byte { } dst := make([]byte, len(data)) copy(dst, data) - t.decode(dst[0:8], dst[0:8]) - tmp := make([]byte, 8) - copy(tmp, dst[0:8]) - for in := 8; in < len(data); in += 8 { - xorQ(dst[in:in+8], tmp, dst[in:in+8]) - t.decode(dst[in:in+8], dst[in:in+8]) - xorQ(dst[in:in+8], data[in-8:in], dst[in:in+8]) - xorQ(dst[in:in+8], data[in-8:in], tmp) + if cpu.LittleEndian { + bswap.Swap64(dst) + } + + var iv1, iv2, holder, tmp int64 + var blocks []int64 + dstHeader := (*reflect.SliceHeader)(unsafe.Pointer(&dst)) + blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks)) + blocksHeader.Data = dstHeader.Data + blocksHeader.Len = dstHeader.Len / 8 + blocksHeader.Cap = blocksHeader.Len + for i, block := range blocks { + tmp = t.decode(block ^ iv2) + iv2 = tmp + holder = tmp ^ iv1 + iv1 = block + blocks[i] = holder + } + + if cpu.LittleEndian { + bswap.Swap64(dst) } return dst[dst[0]&7+3 : len(data)-7] } -//go:nosplit -func unpack(data []byte) (v0, v1 uint32) { - v1 = uint32(data[7]) | uint32(data[6])<<8 | uint32(data[5])<<16 | uint32(data[4])<<24 - v0 = uint32(data[3]) | uint32(data[2])<<8 | uint32(data[1])<<16 | uint32(data[0])<<24 - return v0, v1 -} - -//go:nosplit -func repack(data []byte, v0, v1 uint32) { - _ = data[7] // early bounds check to guarantee safety of writes below - data[0] = byte(v0 >> 24) - data[1] = byte(v0 >> 16) - data[2] = byte(v0 >> 8) - data[3] = byte(v0) - - data[4] = byte(v1 >> 24) - data[5] = byte(v1 >> 16) - data[6] = byte(v1 >> 8) - data[7] = byte(v1) -} - var sumTable = [0x10]uint32{ 0x9e3779b9, 0x3c6ef372, @@ -125,24 +103,24 @@ var sumTable = [0x10]uint32{ } //go:nosplit -func (t *TEA) encode(src, dst []byte) { - v0, v1 := unpack(src) +func (t *TEA) encode(n int64) int64 { + v0, v1 := uint32(n>>32), uint32(n) for i := 0; i < 0x10; i++ { v0 += ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1]) v1 += ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3]) } - repack(dst, v0, v1) + return int64(v0)<<32 | int64(v1) } // 每次8字节 //go:nosplit -func (t *TEA) decode(src, dst []byte) { - v0, v1 := unpack(src) +func (t *TEA) decode(n int64) int64 { + v0, v1 := uint32(n>>32), uint32(n) for i := 0xf; i >= 0; i-- { v1 -= ((v0 << 4) + t[2]) ^ (v0 + sumTable[i]) ^ ((v0 >> 5) + t[3]) v0 -= ((v1 << 4) + t[0]) ^ (v1 + sumTable[i]) ^ ((v1 >> 5) + t[1]) } - repack(dst, v0, v1) + return int64(v0)<<32 | int64(v1) } //go:nosplit diff --git a/binary/tea_test.go b/binary/tea_test.go index c0c9d841..b455b8c7 100644 --- a/binary/tea_test.go +++ b/binary/tea_test.go @@ -110,7 +110,11 @@ func BenchmarkTEAen(b *testing.B) { benchEncrypt(b, data) }) b.Run("4K", func(b *testing.B) { - data := make([]byte, 4096) + data := make([]byte, 1024*4) + benchEncrypt(b, data) + }) + b.Run("32K", func(b *testing.B) { + data := make([]byte, 1024*32) benchEncrypt(b, data) }) } @@ -128,4 +132,8 @@ func BenchmarkTEAde(b *testing.B) { data := make([]byte, 4096) benchDecrypt(b, data) }) + b.Run("32K", func(b *testing.B) { + data := make([]byte, 1024*32) + benchDecrypt(b, data) + }) } diff --git a/go.mod b/go.mod index 9d01057f..7bacd38b 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.16 require ( github.com/klauspost/compress v1.13.6 github.com/pkg/errors v0.9.1 + github.com/segmentio/asm v1.1.0 github.com/stretchr/testify v1.3.0 github.com/tidwall/gjson v1.11.0 golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f diff --git a/go.sum b/go.sum index ae8bb42b..5cbf4f7c 100644 --- a/go.sum +++ b/go.sum @@ -5,10 +5,14 @@ github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI= +github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/segmentio/asm v1.1.0 h1:fkVr8k5J4sKoFjTGVD6r1yKvDKqmvrEh3K7iyVxgBs8= +github.com/segmentio/asm v1.1.0/go.mod h1:4EUJGaKsB8ImLUwOGORVsNd9vTRDeh44JGsY4aKp5I4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/internal/cpu/big_endian.go b/internal/cpu/big_endian.go new file mode 100644 index 00000000..f29b953d --- /dev/null +++ b/internal/cpu/big_endian.go @@ -0,0 +1,6 @@ +//go:build armbe || arm64be || mips || mips64 || ppc || ppc64 || s390 || s390x || sparc || sparc64 +// +build armbe arm64be mips mips64 ppc ppc64 s390 s390x sparc sparc64 + +package cpu + +const LittleEndian = false diff --git a/internal/cpu/little_endian.go b/internal/cpu/little_endian.go new file mode 100644 index 00000000..18d47df1 --- /dev/null +++ b/internal/cpu/little_endian.go @@ -0,0 +1,6 @@ +//go:build 386 || amd64 || arm || arm64 || mipsle || mips64le || ppc64le || riscv || riscv64 || wasm +// +build 386 amd64 arm arm64 mipsle mips64le ppc64le riscv riscv64 wasm + +package cpu + +const LittleEndian = true