diff --git a/internal/btree/btree.go b/internal/btree/btree.go index f8dd4f2..2f7dc18 100644 --- a/internal/btree/btree.go +++ b/internal/btree/btree.go @@ -11,7 +11,7 @@ import ( ) const ( - sha1Size = 20 // md5 sha1 + hashSize = 16 // md5 hash tableSize = (1024 - 1) / int(unsafe.Sizeof(item{})) cacheSlots = 11 // prime superSize = int(unsafe.Sizeof(super{})) @@ -19,7 +19,7 @@ const ( ) type item struct { - sha1 [sha1Size]byte + hash [hashSize]byte offset int64 child int64 } @@ -46,10 +46,12 @@ type DB struct { top int64 freeTop int64 alloc int64 - cache [23]cache + cache [cacheSlots]cache inAllocator bool deleteLarger bool + fqueue [freeQueueLen]chunk + fqueueLen int } func (d *DB) get(offset int64) *table { @@ -163,10 +165,10 @@ func collapse(bt *DB, offset int64) int64 { return ret } -// split a table. The pivot item is stored to 'sha1' and 'offset'. +// split a table. The pivot item is stored to 'hash' and 'offset'. // Returns offset to the new table. func (d *DB) split(t *table, hash *byte, offset *int64) int64 { - copysha1(hash, &t.items[tableSize/2].sha1[0]) + copyhash(hash, &t.items[tableSize/2].hash[0]) *offset = t.items[tableSize/2].offset ntable := new(table) @@ -185,7 +187,7 @@ func (d *DB) split(t *table, hash *byte, offset *int64) int64 { } // takeSmallest find and remove the smallest item from the given table. The key of the item -// is stored to 'sha1'. Returns offset to the item +// is stored to 'hash'. Returns offset to the item func (d *DB) takeSmallest(toff int64, sha1 *byte) int64 { table := d.get(toff) assert(table.size > 0) @@ -207,18 +209,18 @@ func (d *DB) takeSmallest(toff int64, sha1 *byte) int64 { } // takeLargest find and remove the largest item from the given table. The key of the item -// is stored to 'sha1'. Returns offset to the item -func (d *DB) takeLargest(toff int64, sha1 *byte) int64 { +// is stored to 'hash'. Returns offset to the item +func (d *DB) takeLargest(toff int64, hash *byte) int64 { table := d.get(toff) assert(table.size > 0) var off int64 child := table.items[table.size].child if child == 0 { - off = d.remove(table, table.size-1, sha1) + off = d.remove(table, table.size-1, hash) } else { /* recursion */ - off = d.takeLargest(child, sha1) + off = d.takeLargest(child, hash) table.items[table.size].child = collapse(d, child) } d.flush(table, toff) @@ -229,12 +231,12 @@ func (d *DB) takeLargest(toff int64, sha1 *byte) int64 { } // remove an item in position 'i' from the given table. The key of the -// removed item is stored to 'sha1'. Returns offset to the item. -func (d *DB) remove(t *table, i int, sha1 *byte) int64 { +// removed item is stored to 'hash'. Returns offset to the item. +func (d *DB) remove(t *table, i int, hash *byte) int64 { assert(i < t.size) - if sha1 != nil { - copysha1(sha1, &t.items[i].sha1[0]) + if hash != nil { + copyhash(hash, &t.items[i].hash[0]) } offset := t.items[i].offset @@ -246,10 +248,10 @@ func (d *DB) remove(t *table, i int, sha1 *byte) int64 { child tables */ var noff int64 if rand.Int()&1 != 0 { - noff = d.takeLargest(lc, &t.items[i].sha1[0]) + noff = d.takeLargest(lc, &t.items[i].hash[0]) t.items[i].child = collapse(d, lc) } else { - noff = d.takeSmallest(rc, &t.items[i].sha1[0]) + noff = d.takeSmallest(rc, &t.items[i].hash[0]) t.items[i+1].child = collapse(d, rc) } t.items[i].child = noff @@ -268,14 +270,14 @@ func (d *DB) remove(t *table, i int, sha1 *byte) int64 { return offset } -func (d *DB) insert(toff int64, sha1 *byte, data []byte, size int) int64 { +func (d *DB) insert(toff int64, hash *byte, data []byte, size int) int64 { table := d.get(toff) assert(table.size < tableSize-1) left, right := 0, table.size for left < right { mid := (right-left)>>1 + left - switch cmp := cmp(sha1, &table.items[mid].sha1[0]); { + switch cmp := cmp(hash, &table.items[mid].hash[0]); { case cmp == 0: // already in the table ret := table.items[mid].offset @@ -293,7 +295,7 @@ func (d *DB) insert(toff int64, sha1 *byte, data []byte, size int) int64 { lc := table.items[i].child if lc != 0 { /* recursion */ - ret = d.insert(lc, sha1, data, size) + ret = d.insert(lc, hash, data, size) /* check if we need to split */ child := d.get(lc) @@ -304,7 +306,7 @@ func (d *DB) insert(toff int64, sha1 *byte, data []byte, size int) int64 { return ret } /* overwrites SHA-1 */ - rc = d.split(child, sha1, &off) + rc = d.split(child, hash, &off) /* flush just in case changes happened */ d.flush(child, lc) @@ -319,7 +321,7 @@ func (d *DB) insert(toff int64, sha1 *byte, data []byte, size int) int64 { // memmove(&table->items[i + 1], &table->items[i], // (table->size - i) * sizeof(struct btree_item)); copy(table.items[i+1:], table.items[i:]) - copysha1(&table.items[i].sha1[0], sha1) + copyhash(&table.items[i].hash[0], hash) table.items[i].offset = off table.items[i].child = lc table.items[i+1].child = rc @@ -351,9 +353,9 @@ func (d *DB) insertData(data []byte, size int) int64 { return offset } -// delete remove an item with key 'sha1' from the given table. The offset to the +// delete remove an item with key 'hash' from the given table. The offset to the // removed item is returned. -// Please note that 'sha1' is overwritten when called inside the allocator. +// Please note that 'hash' is overwritten when called inside the allocator. func (d *DB) delete(offset int64, hash *byte) int64 { if offset == 0 { return 0 @@ -363,7 +365,7 @@ func (d *DB) delete(offset int64, hash *byte) int64 { left, right := 0, table.size for left < right { i := (right-left)>>1 + left - switch cmp := cmp(hash, &table.items[i].sha1[0]); { + switch cmp := cmp(hash, &table.items[i].hash[0]); { case cmp == 0: // found ret := d.remove(table, i, hash) @@ -396,10 +398,10 @@ func (d *DB) delete(offset int64, hash *byte) int64 { return ret } -func (d *DB) insertTopLevel(toff *int64, sha1 *byte, data []byte, size int) int64 { // nolint:unparam +func (d *DB) insertTopLevel(toff *int64, hash *byte, data []byte, size int) int64 { // nolint:unparam var off, ret, rc int64 if *toff != 0 { - ret = d.insert(*toff, sha1, data, size) + ret = d.insert(*toff, hash, data, size) /* check if we need to split */ table := d.get(*toff) @@ -408,7 +410,7 @@ func (d *DB) insertTopLevel(toff *int64, sha1 *byte, data []byte, size int) int6 d.put(table, *toff) return ret } - rc = d.split(table, sha1, &off) + rc = d.split(table, hash, &off) d.flush(table, *toff) } else { off = d.insertData(data, size) @@ -418,7 +420,7 @@ func (d *DB) insertTopLevel(toff *int64, sha1 *byte, data []byte, size int) int6 /* create new top level table */ t := new(table) t.size = 1 - copysha1(&t.items[0].sha1[0], sha1) + copyhash(&t.items[0].hash[0], hash) t.items[0].offset = off t.items[0].child = *toff t.items[1].child = rc @@ -433,7 +435,7 @@ func (d *DB) insertTopLevel(toff *int64, sha1 *byte, data []byte, size int) int6 return ret } -func (d *DB) lookup(toff int64, sha1 *byte) int64 { +func (d *DB) lookup(toff int64, hash *byte) int64 { if toff == 0 { return 0 } @@ -442,7 +444,7 @@ func (d *DB) lookup(toff int64, sha1 *byte) int64 { left, right := 0, table.size for left < right { mid := (right-left)>>1 + left - switch cmp := cmp(sha1, &table.items[mid].sha1[0]); { + switch cmp := cmp(hash, &table.items[mid].hash[0]); { case cmp == 0: // found ret := table.items[mid].offset @@ -458,26 +460,26 @@ func (d *DB) lookup(toff int64, sha1 *byte) int64 { i := left child := table.items[i].child d.put(table, toff) - return d.lookup(child, sha1) + return d.lookup(child, hash) } -// Insert a new item with key 'sha1' with the contents in 'data' to the +// Insert a new item with key 'hash' with the contents in 'data' to the // database file. -func (d *DB) Insert(csha1 *byte, data []byte) { +func (d *DB) Insert(chash *byte, data []byte) { /* SHA-1 must be in writable memory */ - var sha1 [sha1Size]byte - copysha1(&sha1[0], csha1) + var hash [hashSize]byte + copyhash(&hash[0], chash) - _ = d.insertTopLevel(&d.top, &sha1[0], data, len(data)) + _ = d.insertTopLevel(&d.top, &hash[0], data, len(data)) freeQueued(d) d.flushSuper() } -// Get look up item with the given key 'sha1' in the database file. Length of the +// Get look up item with the given key 'hash' in the database file. Length of the // item is stored in 'len'. Returns a pointer to the contents of the item. // The returned pointer should be released with free() after use. -func (d *DB) Get(sha1 *byte) []byte { - off := d.lookup(d.top, sha1) +func (d *DB) Get(hash *byte) []byte { + off := d.lookup(d.top, hash) if off == 0 { return nil } @@ -495,7 +497,7 @@ func (d *DB) Get(sha1 *byte) []byte { return data[:n] } -// Delete remove item with the given key 'sha1' from the database file. +// Delete remove item with the given key 'hash' from the database file. func (d *DB) Delete(sha1 *byte) error { return errors.New("impl me") } diff --git a/internal/btree/chunk.go b/internal/btree/chunk.go index 0b48e99..4b38c14 100644 --- a/internal/btree/chunk.go +++ b/internal/btree/chunk.go @@ -12,18 +12,12 @@ type chunk struct { const freeQueueLen = 64 -// todo(wdvxdr): move this to btree? -var ( - fqueue [freeQueueLen]chunk - fqueueLen = 0 -) - func freeQueued(bt *DB) { - for i := 0; i < fqueueLen; i++ { - chunk := &fqueue[i] + for i := 0; i < bt.fqueueLen; i++ { + chunk := &bt.fqueue[i] bt.freeChunk(chunk.offset, chunk.len) } - fqueueLen = 0 + bt.fqueueLen = 0 } func (d *DB) allocChunk(size int) int64 { @@ -36,10 +30,10 @@ func (d *DB) allocChunk(size int) int64 { const i32s = unsafe.Sizeof(int32(0)) /* create fake size SHA-1 */ - var sha1 [sha1Size]byte + var sha1 [hashSize]byte p := unsafe.Pointer(&sha1[0]) - *(*int32)(p) = -1 // *(uint32_t *) sha1 = -1; - *(*uint32)(unsafe.Add(p, i32s)) = uint32(size) // ((__be32 *) sha1)[1] = to_be32(size); + *(*int32)(p) = -1 // *(uint32_t *) hash = -1; + *(*uint32)(unsafe.Add(p, i32s)) = uint32(size) // ((__be32 *) hash)[1] = to_be32(size); /* find free chunk with the larger or the same size/SHA-1 */ d.inAllocator = true @@ -47,13 +41,13 @@ func (d *DB) allocChunk(size int) int64 { offset = d.delete(d.freeTop, &sha1[0]) d.deleteLarger = false if offset != 0 { - assert(*(*int32)(p) == -1) // assert(*(uint32_t *) sha1 == (uint32_t) -1) - flen := int(*(*uint32)(unsafe.Add(p, i32s))) // size_t free_len = from_be32(((__be32 *) sha1)[1]) + assert(*(*int32)(p) == -1) // assert(*(uint32_t *) hash == (uint32_t) -1) + flen := int(*(*uint32)(unsafe.Add(p, i32s))) // size_t free_len = from_be32(((__be32 *) hash)[1]) assert(power2(flen) == flen) assert(flen >= size) /* delete buddy information */ - resetsha1(&sha1[0]) + resethash(&sha1[0]) *(*int64)(p) = offset buddyLen := d.delete(d.freeTop, &sha1[0]) assert(buddyLen == int64(size)) @@ -95,28 +89,28 @@ func (d *DB) freeChunk(offset int64, size int) { assert(offset&int64(size-1) == 0) if d.inAllocator { - chunk := &fqueue[fqueueLen] - fqueueLen++ + chunk := &d.fqueue[d.fqueueLen] + d.fqueueLen++ chunk.offset = offset chunk.len = size return } /* create fake offset SHA-1 for buddy allocation */ - var sha1 [sha1Size]byte + var sha1 [hashSize]byte p := unsafe.Pointer(&sha1[0]) d.inAllocator = true const i32s = unsafe.Sizeof(int32(0)) /* add buddy information */ - resetsha1(&sha1[0]) - *(*int32)(p) = -1 // *(uint32_t *) sha1 = -1; - *(*uint32)(unsafe.Add(p, i32s)) = uint32(size) // ((__be32 *) sha1)[1] = to_be32(size); + resethash(&sha1[0]) + *(*int32)(p) = -1 // *(uint32_t *) hash = -1; + *(*uint32)(unsafe.Add(p, i32s)) = uint32(size) // ((__be32 *) hash)[1] = to_be32(size); *(*uint32)(unsafe.Add(p, i32s*2)) = rand.Uint32() /* to make SHA-1 unique */ *(*uint32)(unsafe.Add(p, i32s*3)) = rand.Uint32() - // insert_toplevel(btree, &btree->free_top, sha1, NULL, offset); + // insert_toplevel(btree, &btree->free_top, hash, NULL, offset); _ = d.insertTopLevel(&d.freeTop, &sha1[0], nil, int(offset)) d.inAllocator = false diff --git a/internal/btree/helper.go b/internal/btree/helper.go index a237e55..4a97610 100644 --- a/internal/btree/helper.go +++ b/internal/btree/helper.go @@ -21,7 +21,7 @@ func power2(val int) int { return i } -// helpers for sha1 +// helpers for hash func cmp(a, b *byte) int64 { pa, pb := unsafe.Pointer(a), unsafe.Pointer(b) @@ -29,20 +29,17 @@ func cmp(a, b *byte) int64 { return int64(*(*uint64)(pa) - *(*uint64)(pb)) } pa, pb = unsafe.Add(pa, 8), unsafe.Add(pb, 8) - if *(*uint64)(pa) != *(*uint64)(pb) { - return int64(*(*uint64)(pa) - *(*uint64)(pb)) - } - return int64(*(*uint32)(unsafe.Add(pa, 8)) - *(*uint32)(unsafe.Add(pb, 8))) + return int64(*(*uint64)(pa) - *(*uint64)(pb)) } -func copysha1(dst *byte, src *byte) { +func copyhash(dst *byte, src *byte) { pa, pb := unsafe.Pointer(dst), unsafe.Pointer(src) - *(*[sha1Size]byte)(pa) = *(*[sha1Size]byte)(pb) + *(*[hashSize]byte)(pa) = *(*[hashSize]byte)(pb) } -func resetsha1(sha1 *byte) { +func resethash(sha1 *byte) { p := unsafe.Pointer(sha1) - *(*[sha1Size]byte)(p) = [sha1Size]byte{} + *(*[hashSize]byte)(p) = [hashSize]byte{} } // reading table diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 915cf31..10c165d 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -28,15 +28,12 @@ type Cache struct { db *btree.DB } -// TODO(wdvxdr): cache use md5 key, but btree use sha1 key, -// maybe we can unify to md5 to save some space. - // Insert 添加媒体缓存 func (c *Cache) Insert(md5, data []byte) { c.lock.Lock() defer c.lock.Unlock() - var hash [20]byte + var hash [16]byte copy(hash[:], md5) c.db.Insert(&hash[0], data) } @@ -46,7 +43,7 @@ func (c *Cache) Get(md5 []byte) []byte { c.lock.RLock() defer c.lock.RUnlock() - var hash [20]byte + var hash [16]byte copy(hash[:], md5) return c.db.Get(&hash[0]) }