1
0
mirror of https://github.com/Mrs4s/go-cqhttp.git synced 2025-05-07 12:43:31 +08:00

db/leveldb: impl index read/write drop encoding/gob (#1370)

Two benefit below:
 * shrink go-cqhttp binary size about 200KiB
 * shrink database file from 2.8M to 1.56M compared with v2 database

Also provide a tool to migrate v2 database:
https://github.com/RomiChan/gocq-leveldb-migrate
This commit is contained in:
wdvxdr1123 2022-02-15 22:24:27 +08:00 committed by GitHub
parent f2e26d0e13
commit 9054d4cee8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 687 additions and 83 deletions

25
db/leveldb/const.go Normal file
View File

@ -0,0 +1,25 @@
package leveldb
const dataVersion = 1
const (
group = 0x0
private = 0x1
guildChannel = 0x2
)
type coder byte
const (
coderNil coder = iota
coderInt
coderUint
coderInt32
coderUint32
coderInt64
coderUint64
coderString
coderMSG // global.MSG
coderArrayMSG // []global.MSG
coderStruct // struct{}
)

177
db/leveldb/database_gen.go Normal file
View File

@ -0,0 +1,177 @@
// Code generated by mkrw.go; DO NOT EDIT.
package leveldb
import "github.com/Mrs4s/go-cqhttp/db"
func (w *writer) writeStoredGroupMessage(x *db.StoredGroupMessage) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.string(x.ID)
w.int32(x.GlobalID)
w.writeStoredMessageAttribute(x.Attribute)
w.string(x.SubType)
w.writeQuotedInfo(x.QuotedInfo)
w.int64(x.GroupCode)
w.string(x.AnonymousID)
w.arrayMsg(x.Content)
}
func (r *reader) readStoredGroupMessage() *db.StoredGroupMessage {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.StoredGroupMessage{}
x.ID = r.string()
x.GlobalID = r.int32()
x.Attribute = r.readStoredMessageAttribute()
x.SubType = r.string()
x.QuotedInfo = r.readQuotedInfo()
x.GroupCode = r.int64()
x.AnonymousID = r.string()
x.Content = r.arrayMsg()
return x
}
func (w *writer) writeStoredPrivateMessage(x *db.StoredPrivateMessage) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.string(x.ID)
w.int32(x.GlobalID)
w.writeStoredMessageAttribute(x.Attribute)
w.string(x.SubType)
w.writeQuotedInfo(x.QuotedInfo)
w.int64(x.SessionUin)
w.int64(x.TargetUin)
w.arrayMsg(x.Content)
}
func (r *reader) readStoredPrivateMessage() *db.StoredPrivateMessage {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.StoredPrivateMessage{}
x.ID = r.string()
x.GlobalID = r.int32()
x.Attribute = r.readStoredMessageAttribute()
x.SubType = r.string()
x.QuotedInfo = r.readQuotedInfo()
x.SessionUin = r.int64()
x.TargetUin = r.int64()
x.Content = r.arrayMsg()
return x
}
func (w *writer) writeStoredGuildChannelMessage(x *db.StoredGuildChannelMessage) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.string(x.ID)
w.writeStoredGuildMessageAttribute(x.Attribute)
w.uint64(x.GuildID)
w.uint64(x.ChannelID)
w.writeQuotedInfo(x.QuotedInfo)
w.arrayMsg(x.Content)
}
func (r *reader) readStoredGuildChannelMessage() *db.StoredGuildChannelMessage {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.StoredGuildChannelMessage{}
x.ID = r.string()
x.Attribute = r.readStoredGuildMessageAttribute()
x.GuildID = r.uint64()
x.ChannelID = r.uint64()
x.QuotedInfo = r.readQuotedInfo()
x.Content = r.arrayMsg()
return x
}
func (w *writer) writeStoredMessageAttribute(x *db.StoredMessageAttribute) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.int32(x.MessageSeq)
w.int32(x.InternalID)
w.int64(x.SenderUin)
w.string(x.SenderName)
w.int64(x.Timestamp)
}
func (r *reader) readStoredMessageAttribute() *db.StoredMessageAttribute {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.StoredMessageAttribute{}
x.MessageSeq = r.int32()
x.InternalID = r.int32()
x.SenderUin = r.int64()
x.SenderName = r.string()
x.Timestamp = r.int64()
return x
}
func (w *writer) writeStoredGuildMessageAttribute(x *db.StoredGuildMessageAttribute) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.uint64(x.MessageSeq)
w.uint64(x.InternalID)
w.uint64(x.SenderTinyID)
w.string(x.SenderName)
w.int64(x.Timestamp)
}
func (r *reader) readStoredGuildMessageAttribute() *db.StoredGuildMessageAttribute {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.StoredGuildMessageAttribute{}
x.MessageSeq = r.uint64()
x.InternalID = r.uint64()
x.SenderTinyID = r.uint64()
x.SenderName = r.string()
x.Timestamp = r.int64()
return x
}
func (w *writer) writeQuotedInfo(x *db.QuotedInfo) {
if x == nil {
w.nil()
return
}
w.coder(coderStruct)
w.string(x.PrevID)
w.int32(x.PrevGlobalID)
w.arrayMsg(x.QuotedContent)
}
func (r *reader) readQuotedInfo() *db.QuotedInfo {
coder := r.coder()
if coder == coderNil {
return nil
}
x := &db.QuotedInfo{}
x.PrevID = r.string()
x.PrevGlobalID = r.int32()
x.QuotedContent = r.arrayMsg()
return x
}

View File

@ -1,54 +1,40 @@
package leveldb
import (
"bytes"
"encoding/gob"
"path"
"github.com/Mrs4s/MiraiGo/utils"
"github.com/Mrs4s/MiraiGo/binary"
"github.com/Mrs4s/MiraiGo/utils"
"github.com/pkg/errors"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt"
"gopkg.in/yaml.v3"
"github.com/Mrs4s/go-cqhttp/db"
"github.com/Mrs4s/go-cqhttp/global"
"github.com/Mrs4s/go-cqhttp/modules/config"
)
type LevelDBImpl struct {
type database struct {
db *leveldb.DB
}
const (
group byte = 0x0
private byte = 0x1
guildChannel byte = 0x2
)
// config leveldb 相关配置
type config struct {
Enable bool `yaml:"enable"`
}
func init() {
gob.Register(db.StoredMessageAttribute{})
gob.Register(db.StoredGuildMessageAttribute{})
gob.Register(db.QuotedInfo{})
gob.Register(global.MSG{})
gob.Register(db.StoredGroupMessage{})
gob.Register(db.StoredPrivateMessage{})
gob.Register(db.StoredGuildChannelMessage{})
db.Register("leveldb", func(node yaml.Node) db.Database {
conf := new(config.LevelDBConfig)
conf := new(config)
_ = node.Decode(conf)
if !conf.Enable {
return nil
}
return &LevelDBImpl{}
return &database{}
})
}
func (ldb *LevelDBImpl) Open() error {
p := path.Join("data", "leveldb-v2")
func (ldb *database) Open() error {
p := path.Join("data", "leveldb-v3")
d, err := leveldb.OpenFile(p, &opt.Options{
WriteBuffer: 32 * opt.KiB,
})
@ -59,31 +45,31 @@ func (ldb *LevelDBImpl) Open() error {
return nil
}
func (ldb *LevelDBImpl) GetMessageByGlobalID(id int32) (db.StoredMessage, error) {
func (ldb *database) GetMessageByGlobalID(id int32) (_ db.StoredMessage, err error) {
v, err := ldb.db.Get(binary.ToBytes(id), nil)
if err != nil {
if err != nil || len(v) == 0 {
return nil, errors.Wrap(err, "get value error")
}
r := binary.NewReader(v)
switch r.ReadByte() {
defer func() {
if r := recover(); r != nil {
err = errors.Errorf("%v", r)
}
}()
r, err := newReader(utils.B2S(v))
if err != nil {
return nil, err
}
switch r.uvarint() {
case group:
g := &db.StoredGroupMessage{}
if err = gob.NewDecoder(bytes.NewReader(r.ReadAvailable())).Decode(g); err != nil {
return nil, errors.Wrap(err, "decode message error")
}
return g, nil
return r.readStoredGroupMessage(), nil
case private:
p := &db.StoredPrivateMessage{}
if err = gob.NewDecoder(bytes.NewReader(r.ReadAvailable())).Decode(p); err != nil {
return nil, errors.Wrap(err, "decode message error")
}
return p, nil
return r.readStoredPrivateMessage(), nil
default:
return nil, errors.New("unknown message flag")
}
}
func (ldb *LevelDBImpl) GetGroupMessageByGlobalID(id int32) (*db.StoredGroupMessage, error) {
func (ldb *database) GetGroupMessageByGlobalID(id int32) (*db.StoredGroupMessage, error) {
i, err := ldb.GetMessageByGlobalID(id)
if err != nil {
return nil, err
@ -95,7 +81,7 @@ func (ldb *LevelDBImpl) GetGroupMessageByGlobalID(id int32) (*db.StoredGroupMess
return g, nil
}
func (ldb *LevelDBImpl) GetPrivateMessageByGlobalID(id int32) (*db.StoredPrivateMessage, error) {
func (ldb *database) GetPrivateMessageByGlobalID(id int32) (*db.StoredPrivateMessage, error) {
i, err := ldb.GetMessageByGlobalID(id)
if err != nil {
return nil, err
@ -107,59 +93,48 @@ func (ldb *LevelDBImpl) GetPrivateMessageByGlobalID(id int32) (*db.StoredPrivate
return p, nil
}
func (ldb *LevelDBImpl) GetGuildChannelMessageByID(id string) (*db.StoredGuildChannelMessage, error) {
func (ldb *database) GetGuildChannelMessageByID(id string) (*db.StoredGuildChannelMessage, error) {
v, err := ldb.db.Get([]byte(id), nil)
if err != nil {
return nil, errors.Wrap(err, "get value error")
}
r := binary.NewReader(v)
switch r.ReadByte() {
case guildChannel:
g := &db.StoredGuildChannelMessage{}
if err = gob.NewDecoder(bytes.NewReader(r.ReadAvailable())).Decode(g); err != nil {
return nil, errors.Wrap(err, "decode message error")
defer func() {
if r := recover(); r != nil {
err = errors.Errorf("%v", r)
}
return g, nil
}()
r, err := newReader(utils.B2S(v))
if err != nil {
return nil, err
}
switch r.uvarint() {
case guildChannel:
return r.readStoredGuildChannelMessage(), nil
default:
return nil, errors.New("unknown message flag")
}
}
func (ldb *LevelDBImpl) InsertGroupMessage(msg *db.StoredGroupMessage) error {
buf := global.NewBuffer()
defer global.PutBuffer(buf)
if err := gob.NewEncoder(buf).Encode(msg); err != nil {
return errors.Wrap(err, "encode message error")
}
err := ldb.db.Put(binary.ToBytes(msg.GlobalID), binary.NewWriterF(func(w *binary.Writer) {
w.WriteByte(group)
w.Write(buf.Bytes())
}), nil)
func (ldb *database) InsertGroupMessage(msg *db.StoredGroupMessage) error {
w := newWriter()
w.uvarint(group)
w.writeStoredGroupMessage(msg)
err := ldb.db.Put(binary.ToBytes(msg.GlobalID), w.bytes(), nil)
return errors.Wrap(err, "put data error")
}
func (ldb *LevelDBImpl) InsertPrivateMessage(msg *db.StoredPrivateMessage) error {
buf := global.NewBuffer()
defer global.PutBuffer(buf)
if err := gob.NewEncoder(buf).Encode(msg); err != nil {
return errors.Wrap(err, "encode message error")
}
err := ldb.db.Put(binary.ToBytes(msg.GlobalID), binary.NewWriterF(func(w *binary.Writer) {
w.WriteByte(private)
w.Write(buf.Bytes())
}), nil)
func (ldb *database) InsertPrivateMessage(msg *db.StoredPrivateMessage) error {
w := newWriter()
w.uvarint(private)
w.writeStoredPrivateMessage(msg)
err := ldb.db.Put(binary.ToBytes(msg.GlobalID), w.bytes(), nil)
return errors.Wrap(err, "put data error")
}
func (ldb *LevelDBImpl) InsertGuildChannelMessage(msg *db.StoredGuildChannelMessage) error {
buf := global.NewBuffer()
defer global.PutBuffer(buf)
if err := gob.NewEncoder(buf).Encode(msg); err != nil {
return errors.Wrap(err, "encode message error")
}
err := ldb.db.Put(utils.S2B(msg.ID), binary.NewWriterF(func(w *binary.Writer) {
w.WriteByte(guildChannel)
w.Write(buf.Bytes())
}), nil)
func (ldb *database) InsertGuildChannelMessage(msg *db.StoredGuildChannelMessage) error {
w := newWriter()
w.uvarint(guildChannel)
w.writeStoredGuildChannelMessage(msg)
err := ldb.db.Put(utils.S2B(msg.ID), w.bytes(), nil)
return errors.Wrap(err, "put data error")
}

129
db/leveldb/mkrw.go Normal file
View File

@ -0,0 +1,129 @@
//go:build ignore
package main
import (
"bytes"
"fmt"
"go/ast"
"go/format"
"go/parser"
"go/token"
"os"
)
var output bytes.Buffer
func fprintf(format string, args ...interface{}) {
_, _ = fmt.Fprintf(&output, format, args...)
}
func main() {
f, _ := parser.ParseFile(token.NewFileSet(), "./../database.go", nil, 0)
fprintf("// Code generated by mkrw.go; DO NOT EDIT.\n\n")
fprintf("package leveldb\n\n")
fprintf("import \"github.com/Mrs4s/go-cqhttp/db\"\n\n")
ast.Inspect(f, func(node ast.Node) bool {
switch node := node.(type) {
case *ast.FuncDecl:
return false
case *ast.TypeSpec:
if !node.Name.IsExported() {
return false
}
x, ok := node.Type.(*ast.StructType)
if !ok {
return false
}
if x.Fields != nil && x.Fields.List != nil {
mkWrite(node)
mkRead(node)
}
}
return true
})
out, err := format.Source(output.Bytes())
if err != nil {
fmt.Println(string(output.Bytes()))
panic(err)
}
os.WriteFile("database_gen.go", out, 0o644)
}
func typeName(typ ast.Expr) string {
switch typ := typ.(type) {
case *ast.Ident:
return typ.Name
case *ast.ArrayType:
if typ.Len != nil {
panic("unexpected array type")
}
return "[]" + typeName(typ.Elt)
case *ast.SelectorExpr:
return typeName(typ.X) + "." + typ.Sel.Name
}
panic("unexpected type")
}
func mkWrite(node *ast.TypeSpec) {
typename := node.Name.String()
structType := node.Type.(*ast.StructType)
fprintf("func (w *writer) write%s(x *db.%s) {\n", typename, typename)
fprintf("if x == nil {\n")
fprintf("w.nil()\n")
fprintf("return\n")
fprintf("}\n")
fprintf("w.coder(coderStruct)\n")
for _, field := range structType.Fields.List {
switch typ := field.Type.(type) {
case *ast.Ident:
for _, name := range field.Names {
fprintf("w.%s(x.%s)\n", typ.Name, name.Name)
}
case *ast.ArrayType:
if typeName(typ) != "[]global.MSG" {
panic("unexpected array type")
}
for _, name := range field.Names {
fprintf("w.arrayMsg(x.%s)\n", name.Name)
}
case *ast.StarExpr:
for _, name := range field.Names {
fprintf("w.write%s(x.%s)\n", typeName(typ.X), name.Name)
}
}
}
fprintf("}\n\n")
}
func mkRead(node *ast.TypeSpec) {
typename := node.Name.String()
structType := node.Type.(*ast.StructType)
fprintf(`func (r *reader) read%s() *db.%s {
coder := r.coder()
if coder == coderNil {
return nil
}`+"\n", typename, typename)
fprintf("x := &db.%s{}\n", typename)
for _, field := range structType.Fields.List {
switch typ := field.Type.(type) {
case *ast.Ident:
for _, name := range field.Names {
fprintf("x.%s = r.%s()\n", name.Name, typ.Name)
}
case *ast.ArrayType:
if typeName(typ) != "[]global.MSG" {
panic("unexpected array type")
}
for _, name := range field.Names {
fprintf("x.%s = r.arrayMsg()\n", name.Name)
}
case *ast.StarExpr:
for _, name := range field.Names {
fprintf("x.%s = r.read%s()\n", name.Name, typeName(typ.X))
}
}
}
fprintf("return x\n")
fprintf("}\n\n")
}

154
db/leveldb/reader.go Normal file
View File

@ -0,0 +1,154 @@
package leveldb
import (
"encoding/binary"
"io"
"strconv"
"strings"
"github.com/pkg/errors"
"github.com/Mrs4s/go-cqhttp/global"
)
type intReader struct {
data string
*strings.Reader
}
func newIntReader(s string) intReader {
return intReader{
data: s,
Reader: strings.NewReader(s),
}
}
func (r *intReader) varint() int64 {
i, _ := binary.ReadVarint(r)
return i
}
func (r *intReader) uvarint() uint64 {
i, _ := binary.ReadUvarint(r)
return i
}
// reader implements the index read.
// data format is the same as the writer's
type reader struct {
data intReader
strings intReader
stringIndex map[uint64]string
}
func (r *reader) coder() coder { o, _ := r.data.ReadByte(); return coder(o) }
func (r *reader) varint() int64 { return r.data.varint() }
func (r *reader) uvarint() uint64 { return r.data.uvarint() }
func (r *reader) sync(c coder) {
if coder := r.coder(); coder != c {
panic("db/leveldb: bad sync expected " + strconv.Itoa(int(c)) + " but got " + strconv.Itoa(int(coder)))
}
}
func (r *reader) int() int {
return int(r.varint())
}
func (r *reader) uint() uint {
return uint(r.uvarint())
}
func (r *reader) int32() int32 {
return int32(r.varint())
}
func (r *reader) uint32() uint32 {
return uint32(r.uvarint())
}
func (r *reader) int64() int64 {
return r.varint()
}
func (r *reader) uint64() uint64 {
return r.uvarint()
}
func (r *reader) string() string {
off := r.data.uvarint()
if s, ok := r.stringIndex[off]; ok {
return s
}
_, _ = r.strings.Seek(int64(off), io.SeekStart)
l := int64(r.strings.uvarint())
whence, _ := r.strings.Seek(0, io.SeekCurrent)
s := r.strings.data[whence : whence+l]
r.stringIndex[off] = s
return s
}
func (r *reader) msg() global.MSG {
length := r.uvarint()
msg := make(global.MSG, length)
for i := uint64(0); i < length; i++ {
s := r.string()
msg[s] = r.obj()
}
return msg
}
func (r *reader) arrayMsg() []global.MSG {
length := r.uvarint()
msgs := make([]global.MSG, length)
for i := range msgs {
msgs[i] = r.msg()
}
return msgs
}
func (r *reader) obj() interface{} {
switch coder := r.coder(); coder {
case coderNil:
return nil
case coderInt:
return int(r.varint())
case coderUint:
return uint(r.uvarint())
case coderInt32:
return int32(r.varint())
case coderUint32:
return uint32(r.uvarint())
case coderInt64:
return r.varint()
case coderUint64:
return r.uvarint()
case coderString:
return r.string()
case coderMSG:
return r.msg()
case coderArrayMSG:
return r.arrayMsg()
default:
panic("db/leveldb: invalid coder " + strconv.Itoa(int(coder)))
}
}
func newReader(data string) (*reader, error) {
in := newIntReader(data)
v := in.uvarint()
if v != dataVersion {
return nil, errors.Errorf("db/leveldb: invalid data version %d", v)
}
sl := int64(in.uvarint())
dl := int64(in.uvarint())
whence, _ := in.Seek(0, io.SeekCurrent)
sData := data[whence : whence+sl]
dData := data[whence+sl : whence+sl+dl]
r := reader{
data: newIntReader(dData),
strings: newIntReader(sData),
stringIndex: make(map[uint64]string),
}
return &r, nil
}

149
db/leveldb/writer.go Normal file
View File

@ -0,0 +1,149 @@
package leveldb
import (
"bytes"
"io"
"github.com/Mrs4s/go-cqhttp/global"
)
type intWriter struct {
bytes.Buffer
}
func (w *intWriter) varint(x int64) {
w.uvarint(uint64(x)<<1 ^ uint64(x>>63))
}
func (w *intWriter) uvarint(x uint64) {
for x >= 0x80 {
w.WriteByte(byte(x) | 0x80)
x >>= 7
}
w.WriteByte(byte(x))
}
// writer implements the index write.
// data format(use uvarint to encode integers):
// | version | string data length | index data length | string data | index data |
// for string data part, each string is encoded as:
// | string length | string |
// for index data part, each value is encoded as:
// | coder | value |
// * coder is the identifier of value's type.
// * specially for string, it's value is the offset in string data part.
type writer struct {
data intWriter
strings intWriter
stringIndex map[string]uint64
}
func newWriter() *writer {
return &writer{
stringIndex: make(map[string]uint64),
}
}
func (w *writer) coder(o coder) { w.data.WriteByte(byte(o)) }
func (w *writer) varint(x int64) { w.data.varint(x) }
func (w *writer) uvarint(x uint64) { w.data.uvarint(x) }
func (w *writer) nil() { w.coder(coderNil) }
func (w *writer) int(i int) {
w.varint(int64(i))
}
func (w *writer) uint(i uint) {
w.uvarint(uint64(i))
}
func (w *writer) int32(i int32) {
w.varint(int64(i))
}
func (w *writer) uint32(i uint32) {
w.uvarint(uint64(i))
}
func (w *writer) int64(i int64) {
w.varint(i)
}
func (w *writer) uint64(i uint64) {
w.uvarint(i)
}
func (w *writer) string(s string) {
off, ok := w.stringIndex[s]
if !ok {
// not found write to string data part
// | string length | string |
off = uint64(w.strings.Len())
w.strings.uvarint(uint64(len(s)))
_, _ = w.strings.WriteString(s)
w.stringIndex[s] = off
}
// write offset to index data part
w.uvarint(off)
}
func (w *writer) msg(m global.MSG) {
w.uvarint(uint64(len(m)))
for s, obj := range m {
w.string(s)
w.obj(obj)
}
}
func (w *writer) arrayMsg(a []global.MSG) {
w.uvarint(uint64(len(a)))
for _, v := range a {
w.msg(v)
}
}
func (w *writer) obj(o interface{}) {
switch x := o.(type) {
case nil:
w.nil()
case int:
w.coder(coderInt)
w.int(x)
case int32:
w.coder(coderInt32)
w.int32(x)
case int64:
w.coder(coderInt64)
w.int64(x)
case uint:
w.coder(coderUint)
w.uint(x)
case uint32:
w.coder(coderUint32)
w.uint32(x)
case uint64:
w.coder(coderUint64)
w.uint64(x)
case string:
w.coder(coderString)
w.string(x)
case global.MSG:
w.coder(coderMSG)
w.msg(x)
case []global.MSG:
w.coder(coderArrayMSG)
w.arrayMsg(x)
default:
panic("unsupported type")
}
}
func (w *writer) bytes() []byte {
var out intWriter
out.uvarint(dataVersion)
out.uvarint(uint64(w.strings.Len()))
out.uvarint(uint64(w.data.Len()))
_, _ = io.Copy(&out, &w.strings)
_, _ = io.Copy(&out, &w.data)
return out.Bytes()
}

View File

@ -75,11 +75,6 @@ type Server struct {
Default string
}
// LevelDBConfig leveldb 相关配置
type LevelDBConfig struct {
Enable bool `yaml:"enable"`
}
// MongoDBConfig mongodb 相关配置
type MongoDBConfig struct {
Enable bool `yaml:"enable"`