#include #include #include "mdb.h" #include "impl.h" static void* mdb_bsearch(void *base0, ulong nel, ulong width, int (*compar)(void*, void*), void *key) { char *base; int lim, cmp; void *p; base = base0; for(lim = nel; lim != 0; lim >>= 1){ p = base + (lim >> 1) * width; cmp = compar(key, p); if(cmp == 0) return p; if(cmp > 0){ base = (char*)p + width; lim--; } } return nil; } KeyPair* mdb_search_leaf(Leaf *leaf, Key *key) { if(leaf->n == 0) return nil; return mdb_bsearch(leaf->keypairs, leaf->n, sizeof(leaf->keypairs[0]), mdb_keycmp, key); } static int slot_for_key(Key *keys, int nkeys, Key *key) { int i; for(i = 0; i < nkeys && mdb_keycmp(key, &keys[i]) > 0; i++) ; return i; } typedef struct MDBCursor MDBCursor; struct MDBCursor { Page *p; int index; }; /* * walk branches/leaves looking for key. * return 0 if found and -1 if not found. * if found, populate KeyPair.pid and KeyPair.length. */ int mdb_tree_search(MDB *db, KeyPair *search) { u32int pid, slot; Page *p; KeyPair *kp; pid = db->meta.root; for(;;){ p = mdb_readpage(db, pid, T_ANY); switch(p->type){ default: sysfatal("bad page %ud type %ud in search", p->id, p->type); err: mdb_putpage(db, p); return -1; case T_LEAF: kp = mdb_search_leaf(&p->leaf, &search->key); if(kp == nil) goto err; search->pid = kp->pid; search->length = kp->length; mdb_putpage(db, p); return 0; case T_BRANCH: slot = slot_for_key(p->branch.keys, p->branch.n, &search->key); pid = p->branch.child[slot]; mdb_putpage(db, p); break; } } } static void mdb_print_branch(Page *p) { u32int i; Branch *b; assert(p->type == T_BRANCH); b = &p->branch; fprint(2, "===== branch page=%ud n=%ud\n", p->id, b->n); for(i = 0; i < b->n; i++){ fprint(2, " %ud child page=%ud\n", i, b->child[i]); fprint(2, " %ud key %.*s\n", i, b->keys[i].len, b->keys[i].data); } fprint(2, " %ud child page=%ud\n", i, b->child[i]); } static void mdb_print_leaf(Page *p) { u32int i; Leaf *l; assert(p->type == T_LEAF); l = &p->leaf; fprint(2, "===== leaf page=%ud n=%ud next=%ud\n", p->id, l->n, l->next); for(i = 0; i < l->n; i++){ fprint(2, " %ud %.*s page=%ud length=%ud\n", i, l->keypairs[i].key.len, l->keypairs[i].key.data, l->keypairs[i].pid, l->keypairs[i].length); } } static void mdb_tree_split_child_branch(MDB *db, Page *up, int i, Page *pleft, Page *pright) { u32int n; Branch *left, *right; assert(up->type == T_BRANCH); assert(pleft->type == T_BRANCH); assert(pright->type == T_BRANCH); left = &pleft->branch; right = &pright->branch; assert(left->n == KEYSPERBRANCH); n = left->n/2; /* move right half of left into right */ memmove(&right->keys[0], &left->keys[n+1], n*sizeof(right->keys[0])); right->n = n; left->n = n + 1; /* stuff right in up's children */ memmove(&up->branch.child[i+1], &up->branch.child[i+2], (up->branch.n-i)*sizeof(up->branch.child[0])); up->branch.child[i+1] = pright->id; /* stuff middle of left in up's keys */ memmove(&up->branch.keys[i], &up->branch.keys[i+1], (up->branch.n-i)*sizeof(up->branch.keys[0])); up->branch.keys[i] = right->keys[0]; up->branch.n++; } static void mdb_tree_split_child_leaf(MDB *db, Page *up, int i, Page *pleft, Page *pright) { u32int n; Leaf *left, *right; assert(up->type == T_BRANCH); assert(pleft->type == T_LEAF); assert(pright->type == T_LEAF); left = &pleft->leaf; right = &pright->leaf; assert(left->n == KEYSPERLEAF); n = left->n/2; /* move right half of left into right */ memmove(&right->keypairs[0], &left->keypairs[n+1], n*sizeof(right->keypairs[0])); right->n = n; left->n = n + 1; /* stuff right in up's children */ memmove(&up->branch.child[i+1], &up->branch.child[i+2], (up->branch.n-i)*sizeof(up->branch.child[0])); up->branch.child[i+1] = pright->id; /* stuff middle of left in up's keys */ memmove(&up->branch.keys[i], &up->branch.keys[i+1], (up->branch.n-i)*sizeof(up->branch.keys[0])); up->branch.keys[i] = right->keypairs[0].key; up->branch.n++; } /* split i'th child of branch up */ static void mdb_tree_split_child(MDB *db, Page *up, int i) { Page *left, *right; u32int lpid, rpid; assert(up->type == T_BRANCH); rpid = mdb_allocate(db); right = mdb_getpage(db); right->id = rpid; lpid = up->branch.child[i]; left = mdb_readpage(db, lpid, T_ANY); fprint(2, "split up=%ud left=%ud right=%ud\n", up->id, lpid, rpid); switch(left->type){ default: mdb_error_corrupt(db); break; case T_LEAF: right->type = T_LEAF; mdb_tree_split_child_leaf(db, up, i, left, right); break; case T_BRANCH: right->type = T_BRANCH; mdb_tree_split_child_branch(db, up, i, left, right); break; } mdb_writepage(db, left); mdb_writepage(db, right); } static Page* mdb_getroot(MDB *db) { Page *r; fprint(2, "mdb_getroot: reading root page %ud\n", db->meta.root); r = mdb_readpage(db, db->meta.root, T_ANY); if(r->type != T_BRANCH && r->type != T_LEAF) mdb_error_corrupt(db); return r; } /* put key in a leaf. leaf page is not full. */ void mdb_tree_insert_leaf_nonfull(Leaf *leaf, KeyPair *keypair) { int i; assert(leaf->n < KEYSPERLEAF); for(i = 0; i < leaf->n && mdb_keycmp(&keypair->key, &leaf->keypairs[i].key) > 0; i++) ; memmove(&leaf->keypairs[i+1], &leaf->keypairs[i], (leaf->n - i) * sizeof(leaf->keypairs[0])); leaf->keypairs[i] = *keypair; leaf->n++; } int mdb_tree_insert_branch_nonfull(MDB *db, Page *bpage, KeyPair *keypair) { int i, rv; Branch *branch; Page *p; assert(bpage->type == T_BRANCH); branch = &bpage->branch; assert(branch->n <= KEYSPERBRANCH); rv = 0; i = slot_for_key(branch->keys, branch->n, &keypair->key); fprint(2, "mdb_tree_insert_branch_nonfull: read %ud\n", branch->child[i]); p = mdb_readpage(db, branch->child[i], T_ANY); switch(p->type){ default: sysfatal("mdb_tree_insert_branch_nonfull: not a b+ tree node"); break; case T_LEAF: if(p->leaf.n == KEYSPERLEAF){ mdb_putpage(db, p); mdb_tree_split_child(db, bpage, i); return mdb_tree_insert_branch_nonfull(db, bpage, keypair); } mdb_tree_insert_leaf_nonfull(&p->leaf, keypair); mdb_writepage(db, p); break; case T_BRANCH: if(p->branch.n == KEYSPERBRANCH) sysfatal("tried to insert into full branch"); rv = mdb_tree_insert_branch_nonfull(db, p, keypair); mdb_writepage(db, p); break; } return rv; } /* root is full, grow tree by height 1 */ static void mdb_reroot(MDB *db) { Page *p; u32int pid; pid = mdb_allocate(db); p = mdb_getpage(db); p->id = pid; p->type = T_BRANCH; p->branch.n = 0; p->branch.child[0] = db->meta.root; fprint(2, "mdb_reroot: old root=%ud new root=%ud\n", db->meta.root, p->id); db->meta.root = pid; mdb_syncmeta(db); mdb_tree_split_child(db, p, 0); mdb_writepage(db, p); } int mdb_tree_insert(MDB *db, KeyPair *keypair) { Page *p; p = mdb_getroot(db); switch(p->type){ case T_LEAF: if(p->leaf.n >= KEYSPERLEAF){ /* split root if full */ mdb_putpage(db, p); mdb_reroot(db); return mdb_tree_insert(db, keypair); } mdb_tree_insert_leaf_nonfull(&p->leaf, keypair); mdb_writepage(db, p); break; case T_BRANCH: if(p->branch.n >= KEYSPERBRANCH){ /* split root if full */ mdb_putpage(db, p); mdb_reroot(db); return mdb_tree_insert(db, keypair); } if(mdb_tree_insert_branch_nonfull(db, p, keypair) < 0){ mdb_putpage(db, p); return -1; } mdb_writepage(db, p); break; } return 0; }