1
0
mirror of https://github.com/Mrs4s/go-cqhttp.git synced 2025-05-05 03:23:49 +08:00

internal/btree: remove

Goodbye!
This commit is contained in:
wdvxdr 2022-08-31 21:11:23 +08:00
parent 2f92146092
commit 093605cf01
6 changed files with 0 additions and 964 deletions

View File

@ -1,571 +0,0 @@
// Package btree provide a disk-based btree
package btree
import (
"io"
"math/rand"
"os"
"unsafe"
"github.com/pkg/errors"
)
const (
hashSize = 16 // md5 hash
tableSize = (1024 - 1) / int(unsafe.Sizeof(item{}))
cacheSlots = 11 // prime
superSize = int(unsafe.Sizeof(super{}))
tableStructSize = int(unsafe.Sizeof(table{}))
)
type fileLock interface {
release() error
}
type item struct {
hash [hashSize]byte
offset int64
child int64
}
type table struct {
items [tableSize]item
size int
}
type cache struct {
table *table
offset int64
}
type super struct {
top int64
freeTop int64
alloc int64
}
// DB ...
type DB struct {
fd *os.File
top int64
freeTop int64
alloc int64
cache [cacheSlots]cache
flock fileLock
inAllocator bool
deleteLarger bool
fqueue [freeQueueLen]chunk
fqueueLen int
}
func (d *DB) get(offset int64) *table {
assert(offset != 0)
// take from cache
slot := &d.cache[offset%cacheSlots]
if slot.offset == offset {
return slot.table
}
table := new(table)
d.fd.Seek(offset, io.SeekStart)
err := readTable(d.fd, table)
if err != nil {
panic(errors.Wrap(err, "btree I/O error"))
}
return table
}
func (d *DB) put(t *table, offset int64) {
assert(offset != 0)
// overwrite cache
slot := &d.cache[offset%cacheSlots]
slot.table = t
slot.offset = offset
}
func (d *DB) flush(t *table, offset int64) {
assert(offset != 0)
d.fd.Seek(offset, io.SeekStart)
err := writeTable(d.fd, t)
if err != nil {
panic(errors.Wrap(err, "btree I/O error"))
}
d.put(t, offset)
}
func (d *DB) flushSuper() {
d.fd.Seek(0, io.SeekStart)
super := super{
top: d.top,
freeTop: d.freeTop,
alloc: d.alloc,
}
err := writeSuper(d.fd, &super)
if err != nil {
panic(errors.Wrap(err, "btree I/O error"))
}
}
// Open opens an existed btree file
func Open(name string) (*DB, error) {
lock, err := newFileLock(name + ".lock")
if err != nil {
return nil, errors.New("文件被其他进程占用")
}
btree := new(DB)
fd, err := os.OpenFile(name, os.O_RDWR, 0o644)
if err != nil {
return nil, errors.Wrap(err, "btree open file failed")
}
btree.fd = fd
super := super{}
err = readSuper(fd, &super)
btree.top = super.top
btree.freeTop = super.freeTop
btree.alloc = super.alloc
btree.flock = lock
return btree, errors.Wrap(err, "btree read meta info failed")
}
// Create creates a database
func Create(name string) (*DB, error) {
lock, err := newFileLock(name + ".lock")
if err != nil {
return nil, errors.New("文件被其他进程占用")
}
btree := new(DB)
fd, err := os.OpenFile(name, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0o644)
if err != nil {
return nil, errors.Wrap(err, "btree open file failed")
}
btree.flock = lock
btree.fd = fd
btree.alloc = int64(superSize)
btree.flushSuper()
return btree, nil
}
// Close closes the database
func (d *DB) Close() error {
_ = d.fd.Sync()
if err := d.flock.release(); err != nil {
return err
}
err := d.fd.Close()
for i := 0; i < cacheSlots; i++ {
d.cache[i] = cache{}
}
return errors.Wrap(err, "btree close failed")
}
func collapse(bt *DB, offset int64) int64 {
table := bt.get(offset)
if table.size != 0 {
/* unable to collapse */
bt.put(table, offset)
return offset
}
ret := table.items[0].child
bt.put(table, offset)
/*
* WARNING: this is dangerous as the chunk is added to allocation tree
* before the references to it are removed!
*/
bt.freeChunk(offset, int(unsafe.Sizeof(table)))
return ret
}
// split a table. The pivot item is stored to 'hash' and 'offset'.
// Returns offset to the new table.
func (d *DB) split(t *table, hash *byte, offset *int64) int64 {
copyhash(hash, &t.items[tableSize/2].hash[0])
*offset = t.items[tableSize/2].offset
ntable := new(table)
ntable.size = t.size - tableSize/2 - 1
t.size = tableSize / 2
copy(ntable.items[:ntable.size+1], t.items[tableSize/2+1:])
noff := d.allocChunk(tableStructSize)
d.flush(ntable, noff)
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
return noff
}
// takeSmallest find and remove the smallest item from the given table. The key of the item
// is stored to 'hash'. Returns offset to the item
func (d *DB) takeSmallest(toff int64, sha1 *byte) int64 {
table := d.get(toff)
assert(table.size > 0)
var off int64
child := table.items[0].child
if child == 0 {
off = d.remove(table, 0, sha1)
} else {
/* recursion */
off = d.takeSmallest(child, sha1)
table.items[0].child = collapse(d, child)
}
d.flush(table, toff)
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
return off
}
// takeLargest find and remove the largest item from the given table. The key of the item
// is stored to 'hash'. Returns offset to the item
func (d *DB) takeLargest(toff int64, hash *byte) int64 {
table := d.get(toff)
assert(table.size > 0)
var off int64
child := table.items[table.size].child
if child == 0 {
off = d.remove(table, table.size-1, hash)
} else {
/* recursion */
off = d.takeLargest(child, hash)
table.items[table.size].child = collapse(d, child)
}
d.flush(table, toff)
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
return off
}
// remove an item in position 'i' from the given table. The key of the
// removed item is stored to 'hash'. Returns offset to the item.
func (d *DB) remove(t *table, i int, hash *byte) int64 {
assert(i < t.size)
if hash != nil {
copyhash(hash, &t.items[i].hash[0])
}
offset := t.items[i].offset
lc := t.items[i].child
rc := t.items[i+1].child
if lc != 0 && rc != 0 {
/* replace the removed item by taking an item from one of the
child tables */
var noff int64
if rand.Int()&1 != 0 {
noff = d.takeLargest(lc, &t.items[i].hash[0])
t.items[i].child = collapse(d, lc)
} else {
noff = d.takeSmallest(rc, &t.items[i].hash[0])
t.items[i+1].child = collapse(d, rc)
}
t.items[i].child = noff
} else {
// memmove(&table->items[i], &table->items[i + 1],
// (table->size - i) * sizeof(struct btree_item));
copy(t.items[i:], t.items[i+1:])
t.size--
if lc != 0 {
t.items[i].child = lc
} else {
t.items[i].child = rc
}
}
return offset
}
func (d *DB) insert(toff int64, hash *byte, data []byte, size int) int64 {
table := d.get(toff)
assert(table.size < tableSize-1)
left, right := 0, table.size
for left < right {
mid := (right-left)>>1 + left
switch cmp := cmp(hash, &table.items[mid].hash[0]); {
case cmp == 0:
// already in the table
ret := table.items[mid].offset
d.put(table, toff)
return ret
case cmp < 0:
right = mid
default:
left = mid + 1
}
}
i := left
var off, rc, ret int64
lc := table.items[i].child
if lc != 0 {
/* recursion */
ret = d.insert(lc, hash, data, size)
/* check if we need to split */
child := d.get(lc)
if child.size < tableSize-1 {
/* nothing to do */
d.put(table, toff)
d.put(child, lc)
return ret
}
/* overwrites SHA-1 */
rc = d.split(child, hash, &off)
/* flush just in case changes happened */
d.flush(child, lc)
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
} else {
off = d.insertData(data, size)
ret = off
}
table.size++
// memmove(&table->items[i + 1], &table->items[i],
// (table->size - i) * sizeof(struct btree_item));
copy(table.items[i+1:], table.items[i:])
copyhash(&table.items[i].hash[0], hash)
table.items[i].offset = off
table.items[i].child = lc
table.items[i+1].child = rc
d.flush(table, toff)
return ret
}
func (d *DB) insertData(data []byte, size int) int64 {
if data == nil {
return int64(size)
}
assert(len(data) == size)
offset := d.allocChunk(4 + len(data))
d.fd.Seek(offset, io.SeekStart)
err := write32(d.fd, int32(len(data)))
if err != nil {
panic(errors.Wrap(err, "btree I/O error"))
}
_, err = d.fd.Write(data)
if err != nil {
panic(errors.Wrap(err, "btree I/O error"))
}
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
return offset
}
// delete remove an item with key 'hash' from the given table. The offset to the
// removed item is returned.
// Please note that 'hash' is overwritten when called inside the allocator.
func (d *DB) delete(offset int64, hash *byte) int64 {
if offset == 0 {
return 0
}
table := d.get(offset)
left, right := 0, table.size
for left < right {
i := (right-left)>>1 + left
switch cmp := cmp(hash, &table.items[i].hash[0]); {
case cmp == 0:
// found
ret := d.remove(table, i, hash)
d.flush(table, offset)
return ret
case cmp < 0:
right = i
default:
left = i + 1
}
}
// not found - recursion
i := left
child := table.items[i].child
ret := d.delete(child, hash)
if ret != 0 {
table.items[i].child = collapse(d, child)
}
if ret == 0 && d.deleteLarger && i < table.size {
ret = d.remove(table, i, hash)
}
if ret != 0 {
/* flush just in case changes happened */
d.flush(table, offset)
} else {
d.put(table, offset)
}
return ret
}
func (d *DB) insertTopLevel(toff *int64, hash *byte, data []byte, size int) int64 { // nolint:unparam
var off, ret, rc int64
if *toff != 0 {
ret = d.insert(*toff, hash, data, size)
/* check if we need to split */
table := d.get(*toff)
if table.size < tableSize-1 {
/* nothing to do */
d.put(table, *toff)
return ret
}
rc = d.split(table, hash, &off)
d.flush(table, *toff)
} else {
off = d.insertData(data, size)
ret = off
}
/* create new top level table */
t := new(table)
t.size = 1
copyhash(&t.items[0].hash[0], hash)
t.items[0].offset = off
t.items[0].child = *toff
t.items[1].child = rc
ntoff := d.allocChunk(tableStructSize)
d.flush(t, ntoff)
*toff = ntoff
// make sure data is written before a reference is added to it
_ = d.fd.Sync()
return ret
}
func (d *DB) lookup(toff int64, hash *byte) int64 {
if toff == 0 {
return 0
}
table := d.get(toff)
left, right := 0, table.size
for left < right {
mid := (right-left)>>1 + left
switch cmp := cmp(hash, &table.items[mid].hash[0]); {
case cmp == 0:
// found
ret := table.items[mid].offset
d.put(table, toff)
return ret
case cmp < 0:
right = mid
default:
left = mid + 1
}
}
i := left
child := table.items[i].child
d.put(table, toff)
return d.lookup(child, hash)
}
// Insert a new item with key 'hash' with the contents in 'data' to the
// database file.
func (d *DB) Insert(chash *byte, data []byte) {
/* SHA-1 must be in writable memory */
var hash [hashSize]byte
copyhash(&hash[0], chash)
_ = d.insertTopLevel(&d.top, &hash[0], data, len(data))
freeQueued(d)
d.flushSuper()
}
func (d *DB) readValue(off int64) []byte {
d.fd.Seek(off, io.SeekStart)
length, err := read32(d.fd)
if err != nil {
return nil
}
data := make([]byte, length)
n, err := io.ReadFull(d.fd, data)
if err != nil {
return nil
}
return data[:n]
}
// Get look up item with the given key 'hash' in the database file. Length of the
// item is stored in 'len'. Returns a pointer to the contents of the item.
// The returned pointer should be released with free() after use.
func (d *DB) Get(hash *byte) []byte {
off := d.lookup(d.top, hash)
if off == 0 {
return nil
}
return d.readValue(off)
}
// Delete remove item with the given key 'hash' from the database file.
func (d *DB) Delete(hash *byte) error {
var h [hashSize]byte
copyhash(&h[0], hash)
off := d.delete(d.top, &h[0])
if off == 0 {
return nil // not found key
}
d.top = collapse(d, d.top)
freeQueued(d)
d.flushSuper()
d.fd.Seek(off, io.SeekStart)
length, err := read32(d.fd) // len: 0
if err != nil {
return errors.Wrap(err, "btree I/O error")
}
d.freeChunk(off, int(length+4))
freeQueued(d)
d.flushSuper()
return nil
}
// Foreach iterates over all items in the database file.
func (d *DB) Foreach(iter func(key [16]byte, value []byte)) {
if d.top != 0 {
top := d.get(d.top)
d.iterate(top, iter)
}
}
func (d *DB) iterate(table *table, iter func(key [16]byte, value []byte)) {
for i := 0; i < table.size; i++ {
item := table.items[i]
offset := item.offset
iter(item.hash, d.readValue(offset))
if item.child != 0 {
child := d.get(item.child)
d.iterate(child, iter)
}
}
item := table.items[table.size]
if item.child != 0 {
child := d.get(item.child)
d.iterate(child, iter)
}
}

View File

@ -1,103 +0,0 @@
package btree
import (
"crypto/sha1"
"os"
"testing"
"github.com/Mrs4s/MiraiGo/utils"
assert2 "github.com/stretchr/testify/assert"
)
func tempfile(t *testing.T) string {
temp, err := os.CreateTemp(".", "temp.*.db")
assert2.NoError(t, temp.Close())
assert2.NoError(t, err)
return temp.Name()
}
func removedb(name string) {
os.Remove(name)
os.Remove(name + ".lock")
}
func TestCreate(t *testing.T) {
f := tempfile(t)
_, err := Create(f)
assert2.NoError(t, err)
defer removedb(f)
}
func TestBtree(t *testing.T) {
f := tempfile(t)
defer removedb(f)
bt, err := Create(f)
assert := assert2.New(t)
assert.NoError(err)
tests := []string{
"hello world",
"123",
"We are met on a great battle-field of that war.",
"Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania",
// "00", // TODO: fix this
}
sha := make([]*byte, len(tests))
for i, tt := range tests {
hash := sha1.New()
hash.Write([]byte(tt))
sha[i] = &hash.Sum(nil)[0]
bt.Insert(sha[i], []byte(tt))
}
assert.NoError(bt.Close())
bt, err = Open(f)
assert.NoError(err)
var ss []string
bt.Foreach(func(key [16]byte, value []byte) {
ss = append(ss, string(value))
})
assert.ElementsMatch(tests, ss)
for i, tt := range tests {
assert.Equal([]byte(tt), bt.Get(sha[i]))
}
for i := range tests {
assert.NoError(bt.Delete(sha[i]))
}
for i := range tests {
assert.Equal([]byte(nil), bt.Get(sha[i]))
}
assert.NoError(bt.Close())
}
func testForeach(t *testing.T, elemSize int) {
expected := make([]string, elemSize)
for i := 0; i < elemSize; i++ {
expected[i] = utils.RandomString(20)
}
f := tempfile(t)
defer removedb(f)
bt, err := Create(f)
assert2.NoError(t, err)
for _, v := range expected {
hash := sha1.New()
hash.Write([]byte(v))
bt.Insert(&hash.Sum(nil)[0], []byte(v))
}
var got []string
bt.Foreach(func(key [16]byte, value []byte) {
got = append(got, string(value))
})
assert2.ElementsMatch(t, expected, got)
assert2.NoError(t, bt.Close())
}
func TestDB_Foreach(t *testing.T) {
elemSizes := []int{0, 5, 100, 200}
for _, size := range elemSizes {
testForeach(t, size)
}
}

View File

@ -1,116 +0,0 @@
package btree
import (
"encoding/binary"
"math/rand"
"unsafe"
)
type chunk struct {
offset int64
len int
}
const freeQueueLen = 64
func freeQueued(bt *DB) {
for i := 0; i < bt.fqueueLen; i++ {
chunk := &bt.fqueue[i]
bt.freeChunk(chunk.offset, chunk.len)
}
bt.fqueueLen = 0
}
func (d *DB) allocChunk(size int) int64 {
assert(size > 0)
size = power2(size)
var offset int64
if d.inAllocator {
const i32s = unsafe.Sizeof(int32(0))
/* create fake size SHA-1 */
var sha1 [hashSize]byte
binary.LittleEndian.PutUint32(sha1[0*4:1*4], ^uint32(0)) // *(uint32_t *) hash = -1;
binary.LittleEndian.PutUint32(sha1[1*4:2*4], uint32(size)) // ((__be32 *) hash)[1] = to_be32(size);
/* find free chunk with the larger or the same size/SHA-1 */
d.inAllocator = true
d.deleteLarger = true
offset = d.delete(d.freeTop, &sha1[0])
d.deleteLarger = false
if offset != 0 {
flen := int(binary.LittleEndian.Uint32(sha1[:4])) // size_t free_len = from_be32(((__be32 *) hash)[1])
assert(power2(flen) == flen)
assert(flen >= size)
/* delete buddy information */
sha1 = [hashSize]byte{}
binary.LittleEndian.PutUint64(sha1[0*8:1*8], uint64(offset))
buddyLen := d.delete(d.freeTop, &sha1[0])
assert(buddyLen == int64(size))
d.freeTop = collapse(d, d.freeTop)
d.inAllocator = false
/* free extra space at the end of the chunk */
for flen > size {
flen >>= 1
d.freeChunk(offset+int64(flen), flen)
}
} else {
d.inAllocator = false
}
}
if offset == 0 {
/* not found, allocate from the end of the file */
offset = d.alloc
/* TODO: this wastes memory.. */
if offset&int64(size-1) != 0 {
offset += int64(size) - (offset & (int64(size) - 1))
}
d.alloc = offset + int64(size)
}
d.flushSuper()
// make sure the allocation tree is up-to-date before using the chunk
_ = d.fd.Sync()
return offset
}
/* Mark a chunk as unused in the database file */
func (d *DB) freeChunk(offset int64, size int) {
assert(size > 0)
assert(offset != 0)
size = power2(size)
assert(offset&int64(size-1) == 0)
if d.inAllocator {
chunk := &d.fqueue[d.fqueueLen]
d.fqueueLen++
chunk.offset = offset
chunk.len = size
return
}
/* create fake offset SHA-1 for buddy allocation */
var sha1 [hashSize]byte
d.inAllocator = true
/* add buddy information */
binary.LittleEndian.PutUint32(sha1[0*4:1*4], ^uint32(0)) // *(uint32_t *) hash = -1;
binary.LittleEndian.PutUint32(sha1[1*4:2*4], uint32(size)) // ((__be32 *) hash)[1] = to_be32(size);
binary.LittleEndian.PutUint32(sha1[2*4:3*4], rand.Uint32()) /* to make SHA-1 unique */
binary.LittleEndian.PutUint32(sha1[3*4:4*4], rand.Uint32())
// insert_toplevel(btree, &btree->free_top, hash, NULL, offset);
_ = d.insertTopLevel(&d.freeTop, &sha1[0], nil, int(offset))
d.inAllocator = false
d.flushSuper()
// make sure the allocation tree is up-to-date before removing
// references to the chunk
_ = d.fd.Sync()
}

View File

@ -1,45 +0,0 @@
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
package btree
import (
"os"
"syscall"
)
type unixFileLock struct {
f *os.File
}
func (fl *unixFileLock) release() error {
if err := setFileLock(fl.f, false); err != nil {
return err
}
return fl.f.Close()
}
func newFileLock(path string) (fl fileLock, err error) {
flag := os.O_RDWR
f, err := os.OpenFile(path, flag, 0)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
}
if err != nil {
return
}
err = setFileLock(f, true)
if err != nil {
f.Close()
return
}
fl = &unixFileLock{f: f}
return
}
func setFileLock(f *os.File, lock bool) error {
how := syscall.LOCK_UN
if lock {
how = syscall.LOCK_EX
}
return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
}

View File

@ -1,28 +0,0 @@
package btree
import "syscall"
type windowsFileLock struct {
fd syscall.Handle
}
func (fl *windowsFileLock) release() error {
return syscall.Close(fl.fd)
}
func newFileLock(path string) (fileLock, error) {
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}
const access uint32 = syscall.GENERIC_READ | syscall.GENERIC_WRITE
fd, err := syscall.CreateFile(pathp, access, 0, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err == syscall.ERROR_FILE_NOT_FOUND {
fd, err = syscall.CreateFile(pathp, access, 0, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
}
if err != nil {
return nil, err
}
return &windowsFileLock{fd: fd}, nil
}

View File

@ -1,101 +0,0 @@
package btree
import (
"io"
"reflect"
"unsafe"
)
func assert(cond bool) {
if !cond {
panic("assert failed!")
}
}
// power2 returns a value that is greater or equal to 'val' and is power-of-two.
func power2(val int) int {
i := 1
for i < val {
i <<= 1
}
return i
}
// helpers for hash
func cmp(a, b *byte) int64 {
pa, pb := unsafe.Pointer(a), unsafe.Pointer(b)
if *(*uint64)(pa) != *(*uint64)(pb) {
return int64(*(*uint64)(pa) - *(*uint64)(pb))
}
pa, pb = unsafe.Add(pa, 8), unsafe.Add(pb, 8)
return int64(*(*uint64)(pa) - *(*uint64)(pb))
}
func copyhash(dst *byte, src *byte) {
pa, pb := unsafe.Pointer(dst), unsafe.Pointer(src)
*(*[hashSize]byte)(pa) = *(*[hashSize]byte)(pb)
}
// reading table
func read32(r io.Reader) (int32, error) {
b := make([]byte, 4)
_, err := r.Read(b)
if err != nil {
return 0, err
}
return *(*int32)(unsafe.Pointer(&b[0])), nil
}
func readTable(r io.Reader, t *table) error {
buf := make([]byte, tableStructSize)
_, err := r.Read(buf)
if err != nil {
return err
}
*t = *(*table)(unsafe.Pointer(&buf[0]))
return nil
}
func readSuper(r io.Reader, s *super) error {
buf := make([]byte, superSize)
_, err := r.Read(buf)
if err != nil {
return err
}
*s = *(*super)(unsafe.Pointer(&buf[0]))
return nil
}
// write table
func write32(w io.Writer, t int32) error {
var p []byte
ph := (*reflect.SliceHeader)(unsafe.Pointer(&p))
ph.Data = uintptr(unsafe.Pointer(&t))
ph.Len = 4
ph.Cap = 4
_, err := w.Write(p)
return err
}
func writeTable(w io.Writer, t *table) error {
var p []byte
ph := (*reflect.SliceHeader)(unsafe.Pointer(&p))
ph.Data = uintptr(unsafe.Pointer(t))
ph.Len = tableStructSize
ph.Cap = tableStructSize
_, err := w.Write(p)
return err
}
func writeSuper(w io.Writer, s *super) error {
var p []byte
ph := (*reflect.SliceHeader)(unsafe.Pointer(&p))
ph.Data = uintptr(unsafe.Pointer(s))
ph.Len = superSize
ph.Cap = superSize
_, err := w.Write(p)
return err
}