B+树在内存的实现
一个月前看完uCore内核后,决定写一个B 树在内存中的实现巩固一下在内核代码中学习到的一些编码技巧,B 树常用于组织起数据库的索引,且叶子节点包含真实内容,可以很方便地进行区间查找,比如select mkey > 5 in table之类的,通过叶子节点的链表就可以获取后面的数据了,由于磁盘中块的大小(大多数情况下pageSize = 4096),可以让树的中间节点包含多个子节点以使每个BNode结构体在磁盘上填满每页。从而减少磁盘io,提高运行效率。
代码中的英文注释是我自己写的,可能有一些语法或者词性方面的错误(很久没有写英文了)。
我定义的BNode结构体(在内存中),其中n1为子节点个数,isDirty为相对于磁盘数据是否要写回磁盘。List_entry为组织叶子节点的双链表(该技巧从ucore内核中学来的,如果list_entry用其他数据结构组织起来(比如红黑树),那么BNode就会被红黑树组织起来,该技巧可以减少代码量。)
-
struct BNode
-
{
-
unsigned n1 : 10;
-
unsigned isLeaf : 1;
-
unsigned isDirty : 1;
-
-
//unsigned int n1;
-
//bool isLeaf;
-
BNode* fa;
-
List_entry leaflist;
-
BNode* childp[2 * t];//which contains the pointer of child node
-
//represent the block num of child(point to data block if leaf)
-
int ckey[2 * t];//represent the key for parition child(ckey[i] == the least key of corresponding child i)
-
BNode() :fa(NULL), isLeaf(true), n1(0), isDirty(0)
-
{
-
-
};
-
~BNode()
-
{
-
if (isDirty)
-
{
-
//printf("page write block num %u...\n", this);//WriteBlock func called,copy content from memory to hard disk
-
isDirty = 0;
-
}
-
//delete leaflist;
-
};
-
};
其中BNode中childp虽然存的是子节点的虚地址,实际应用时应该存的是子节点所在的磁盘block num,通过ReadBlock将对应的BNode的读入内存中,然后填入内存中new的一个BNode。(List_entry中prev,nxt也是同理)
ReadBlock最理想的状态是第一次由系统调用int 80触发,后面页置换出去的时候填入对应的磁盘块号,而后由14号中断(缺页)触发调入内存。
t的选择应该使得BNode的大小 == pageSize,但为了测试可以把t设的小一些(>=3).
经测试该B树叶子节点排序没有什么问题,不过中间有没有退化就不知道了,应该没有吧,毕竟我也没有怎么看B 树的定义,具体的思想可以看我写的注释。
-
#include <stdio.h>
-
#include <stdlib.h>
-
#include <assert.h>
-
#include <set>
-
#pragma warning(disable:4996)
-
#define size_int sizeof(int)
-
typedef unsigned int uint;
-
-
using namespace std;
-
-
const static int pageSize = 4096;
-
//const static int t = (pageSize - 96)/ size_int / 4;
-
const static int t = 3;
-
int mtop;
-
const static int bign = 105;
-
int mstack[bign];
-
const static int maxd = 2 * t - 1;
-
const static int mind = t - 1;
-
struct List_entry
-
{
-
List_entry *prev, *nxt;
-
}*mhead;
-
static int inMemory = 0;
-
struct BNode
-
{
-
unsigned n1 : 10;
-
unsigned isLeaf : 1;
-
unsigned isDirty : 1;
-
-
//unsigned int n1;
-
//bool isLeaf;
-
BNode* fa;
-
List_entry leaflist;
-
BNode* childp[2 * t];//which contains the pointer of child node
-
//represent the block num of child(point to data block if leaf)
-
int ckey[2 * t];//represent the key for parition child(ckey[i] == the least key of corresponding child i)
-
BNode() :fa(NULL), isLeaf(true), n1(0), isDirty(0)
-
{
-
-
};
-
~BNode()
-
{
-
if (isDirty)
-
{
-
//printf("page write block num %u...\n", this);//WriteBlock func called,copy content from memory to hard disk
-
isDirty = 0;
-
}
-
//delete leaflist;
-
};
-
};
-
BNode *Troot = NULL, *NodeTemplate;
-
-
bool msplit(BNode* bnode);
-
-
inline BNode* to_struct(List_entry* leafEntry)
-
{
-
if (Troot == NULL)
-
Troot = new BNode();
-
return (BNode*)((char*)leafEntry - ((char*)(&(Troot->leaflist)) - (char*)Troot));
-
}
-
-
inline void listAdd(List_entry* prevn, List_entry* nextn, List_entry* dataentry)
-
{
-
dataentry->prev = prevn;
-
dataentry->nxt = nextn;
-
nextn->prev = dataentry;
-
prevn->nxt = dataentry;
-
}
-
-
inline void listDel(List_entry* dataentry)
-
{
-
dataentry->prev->nxt = dataentry->nxt;
-
dataentry->nxt->prev = dataentry->prev;
-
}
-
-
inline void list_AddAfter(List_entry* listentry, List_entry* dataentry)
-
{
-
List_entry* tnxt = listentry->nxt;
-
listAdd(listentry, tnxt, dataentry);
-
}
-
-
inline BNode* prevB(BNode* u)
-
{
-
assert(u->isLeaf);
-
if (u->leaflist.prev != mhead)
-
{
-
return to_struct(u->leaflist.prev);
-
}
-
else
-
{
-
return NULL;
-
}
-
}
-
-
inline BNode* nextB(BNode* u)
-
{
-
assert(u->isLeaf);
-
if (u->leaflist.nxt != mhead)
-
{
-
return to_struct(u->leaflist.nxt);
-
}
-
else
-
{
-
return NULL;
-
}
-
}
-
-
struct Block
-
{
-
char bdata[pageSize];
-
};
-
-
BNode* getBlock()
-
{
-
return (BNode*)(rand() % 65536);//can be replaced by an unique num
-
}
-
//Block data if read into memory.
-
-
//BTree height increasing or decreasing only when the root node split or merge
-
void ReadBlock(BNode *blocknum = 0)//using virtual addr of BNode to represent the corresponding block.
-
{
-
//if blocknum not in memory
-
//in best case,triggered by int 14 except first time read(int 80)
-
//printf("page read %u...\n", blocknum);
-
inMemory ;
-
}
-
-
void WriteBlock(BNode *blocknum = 0)
-
{
-
//if blocknum is dirty
-
if (blocknum != NULL && blocknum->isDirty == 1)
-
{
-
//printf("page write %d...\n", blocknum);
-
//inMemory--;
-
blocknum->isDirty = 0;
-
}
-
}
-
-
-
void init()
-
{
-
srand(11);
-
Troot = new BNode();
-
/*
-
Troot->isLeaf = true;
-
Troot->fa = NULL;
-
Troot->n1 = 0;
-
Troot->leaflist = new List_entry();*/
-
mhead = new List_entry();
-
mhead->prev = mhead->nxt = mhead;
-
list_AddAfter(mhead, &(Troot->leaflist));
-
}
-
-
inline bool mtransfer(BNode* snode, BNode* dnode, int childno, int mflag) //matching with left rotate and right rotate in binary tree
-
{
-
if (!mflag)//mflag == 0,represent transferring to prev node
-
{
-
uint nd = dnode->n1;
-
dnode->childp[nd] = snode->childp[0];
-
dnode->ckey[nd] = snode->ckey[0];
-
if (!dnode->isLeaf)
-
dnode->childp[nd]->fa = dnode;
-
dnode->n1 ;
-
snode->n1--;
-
for (uint i = 0; i < snode->n1; i )
-
{
-
snode->childp[i] = snode->childp[i 1];
-
snode->ckey[i] = snode->ckey[i 1];
-
}
-
BNode* tfa = snode->fa;
-
ReadBlock(tfa);
-
tfa->ckey[childno] = snode->ckey[0];
-
tfa->isDirty = 1;
-
}
-
else //mflag == 1,represent transferring to next node
-
{
-
snode->n1--;
-
dnode->n1 ;
-
for (uint i = dnode->n1 - 1; i > 0; i--)
-
{
-
dnode->ckey[i] = dnode->ckey[i - 1];
-
dnode->childp[i] = dnode->childp[i - 1];
-
}
-
int sn1 = snode->n1;
-
BNode* tfa = snode->fa;
-
ReadBlock(tfa);
-
tfa->ckey[childno 1] = dnode->ckey[0] = snode->ckey[sn1];
-
dnode->childp[0] = snode->childp[sn1];
-
if (!dnode->isLeaf)
-
dnode->childp[0]->fa = dnode;
-
tfa->isDirty = 1;
-
}
-
snode->isDirty = 1;
-
dnode->isDirty = 1;
-
// WriteBlock(snode);
-
// WriteBlock(dnode);
-
return false;
-
}
-
-
inline bool msplit(BNode *bnode)
-
{
-
BNode* nxtbnode = new BNode();
-
bnode->n1 /= 2;
-
int bn1 = bnode->n1;
-
nxtbnode->n1 = bn1;
-
nxtbnode->isLeaf = bnode->isLeaf;//
-
for (int i = 0; i < bn1; i )
-
{
-
nxtbnode->ckey[i] = bnode->ckey[i bn1];
-
nxtbnode->childp[i] = bnode->childp[i bn1];
-
if (!nxtbnode->isLeaf)
-
nxtbnode->childp[i]->fa = nxtbnode;
-
}
-
-
if(bnode->isLeaf == 1)
-
list_AddAfter(&bnode->leaflist, &nxtbnode->leaflist);
-
int childno = 0;
-
if (bnode->fa == NULL)
-
{
-
Troot = new BNode();
-
bnode->fa = Troot;
-
Troot->ckey[0] = bnode->ckey[0];
-
Troot->childp[0] = bnode;
-
Troot->n1 ;
-
Troot->isLeaf = false;
-
}
-
else
-
{
-
childno = mstack[mtop - 1];
-
}
-
BNode *tfa = nxtbnode->fa = bnode->fa;
-
ReadBlock(tfa);
-
tfa->n1 ;
-
for(uint i = tfa->n1 - 1; i > childno 1 ; i--)
-
{
-
tfa->ckey[i] = tfa->ckey[i - 1];
-
tfa->childp[i] = tfa->childp[i - 1];
-
}
-
tfa->ckey[childno 1] = nxtbnode->ckey[0];
-
tfa->childp[childno 1] = nxtbnode;
-
//nxtbnode->fa = tfa;
-
return true;
-
}
-
-
-
-
inline void mmerge(BNode* u, BNode* v)
-
{
-
int tun = u->n1;
-
u->n1 = v->n1;
-
for (int i = tun; i < u->n1; i )
-
{
-
u->childp[i] = v->childp[i - tun];
-
u->ckey[i] = v->ckey[i - tun];
-
if (!u->isLeaf)
-
u->childp[i]->fa = u;
-
}
-
u->isDirty = 1;
-
if (v->isLeaf)
-
{
-
listDel(&v->leaflist);
-
}
-
delete v;
-
//free space Bnode v occupied on disk
-
inMemory--;
-
-
BNode* ufa = u->fa;
-
ReadBlock(ufa);
-
int chno = mstack[mtop - 1];
-
ufa->n1--;
-
for (int i = chno 1; i < ufa->n1; i )
-
{
-
ufa->childp[i] = ufa->childp[i 1];
-
ufa->ckey[i] = ufa->ckey[i 1];
-
}
-
//ufa->ckey[chno] = u->ckey[0];
-
ufa->isDirty = 1;
-
return;
-
}
-
-
bool DealWithNode(BNode* &u, bool isinsert = true)
-
{
-
int chno = mstack[mtop - 1];
-
BNode* ufa = u->fa;
-
if (ufa != NULL)
-
{
-
ReadBlock(ufa);
-
if(!isinsert)
-
ufa->ckey[chno] = u->ckey[0];
-
}
-
bool mflag = false;
-
if (isinsert)
-
{
-
if (ufa == NULL)
-
{
-
msplit(u);
-
return true;
-
}
-
int un = ufa->n1;
-
if (chno > 0)
-
{
-
BNode* v = ufa->childp[chno - 1];
-
ReadBlock(v);
-
if (v->n1 < maxd)
-
{
-
mtransfer(u, v, chno, 0);
-
return true;
-
}
-
}
-
if (chno < un - 1)
-
{
-
BNode* v = ufa->childp[chno 1];
-
ReadBlock(v);
-
if (v->n1 < maxd)
-
{
-
mtransfer(u, v, chno, 1);
-
return true;
-
}
-
}
-
msplit(u);
-
return false;
-
}
-
else
-
{
-
int un = ufa->n1;
-
BNode* v = NULL;
-
int tflag = 0;
-
if (chno > 0)
-
{
-
v = ufa->childp[chno - 1];
-
ReadBlock(v);
-
if (v->n1 > mind)
-
{
-
mtransfer(v, u, chno - 1, 1);
-
return true;
-
}
-
}
-
if (chno < un - 1)
-
{
-
v = ufa->childp[chno 1];
-
tflag = 1;
-
ReadBlock(v);
-
if (v->n1 > mind)
-
{
-
mtransfer(v, u, chno 1, 0);
-
return true;
-
}
-
}
-
-
if (tflag == 0)
-
{
-
BNode* tmp = u;
-
u = v;
-
v = tmp;
-
mstack[mtop - 1]--;
-
//ufa->ckey[mstack[mtop - 1]] = u->ckey[0];
-
}
-
-
mmerge(u, v);
-
-
if (ufa->n1 == 1 && ufa == Troot)
-
{
-
delete Troot;
-
Troot = u;
-
}
-
if (u == Troot)
-
return true;
-
-
return false;
-
}
-
}
-
-
void BInsert(int mkey)
-
{
-
BNode* u = Troot;
-
-
mtop = 1;
-
while (true)
-
{
-
ReadBlock(u);
-
if (u->isLeaf)
-
{
-
uint i = 0;
-
for (i = 0; i < u->n1; i )
-
{
-
assert(mkey != u->ckey[i]);
-
if (u->ckey[i] > mkey)
-
{
-
break;
-
}
-
}
-
u->n1 ;
-
for (uint j = u->n1 - 1; j > i; j--)
-
{
-
u->childp[j] = u->childp[j - 1];
-
u->ckey[j] = u->ckey[j - 1];
-
}
-
u->childp[i] = getBlock();
-
u->ckey[i] = mkey;
-
u->isDirty = 1;
-
/*
-
if (u->n1 > maxd)
-
{
-
bool flag = false;
-
if (!flag)
-
{
-
BNode *tBNode = prevB(u);
-
if (tBNode != NULL && tBNode->fa == u->fa && tBNode->n1 < maxd)
-
{
-
ReadBlock(u->fa);
-
mtransfer(u, tBNode, 0);
-
flag = true;
-
}
-
}
-
-
if (!flag)
-
{
-
BNode* tBNode = nextB(u);
-
if (tBNode != NULL && tBNode->fa == u->fa && tBNode->n1 < maxd)
-
{
-
ReadBlock(u->fa);
-
mtransfer(u, tBNode, 1);
-
flag = true;
-
}
-
}
-
-
if (!flag)
-
{
-
//ReadBlock(u->fa);
-
msplit(u);
-
//WriteBlock(u->fa);
-
}
-
-
}
-
WriteBlock(u);*/
-
break;
-
}
-
else
-
{
-
uint i = 0;
-
for (i = 0; i < u->n1; i )
-
{
-
assert(mkey != u->ckey[i]);
-
if (u->ckey[i] > mkey)
-
{
-
if (i > 0)
-
i--;
-
break;
-
}
-
}
-
if (i == u->n1 && i > 0)
-
i--;
-
if (i == 0)
-
{
-
if (mkey < u->ckey[i])
-
{
-
u->ckey[i] = mkey;
-
u->isDirty = 1;
-
//WriteBlock(u);
-
}
-
}
-
mstack[mtop ] = i;
-
u = u->childp[i];
-
}
-
}
-
//u = u->fa;
-
-
while(u != NULL)
-
{
-
ReadBlock(u);
-
-
if (u->n1 <= maxd)
-
{
-
// WriteBlock(u);
-
break;
-
}
-
bool flag = false;
-
flag = DealWithNode(u, true);
-
-
if (!flag)
-
u = u->fa;
-
else
-
break;
-
mtop--;
-
}
-
-
}
-
inline void fixchno(BNode *u)
-
{
-
while (u != Troot)
-
{
-
int chno = mstack[mtop - 1];
-
BNode *ufa = u->fa;
-
ReadBlock(ufa);
-
assert(ufa->childp[chno] == u);
-
if (ufa->ckey[chno] == u->ckey[0])
-
{
-
break;
-
}
-
else
-
{
-
ufa->ckey[chno] = u->ckey[0];
-
ufa->isDirty = 1;
-
//WriteBlock(ufa);
-
}
-
u = ufa;
-
mtop--;
-
}
-
}
-
-
void BDelete(int mkey)
-
{
-
BNode* u = Troot;
-
-
mtop = 1;
-
while (true)
-
{
-
ReadBlock(u);
-
if (u->isLeaf)
-
{
-
uint i = 0;
-
for (i = 0; i < u->n1; i )
-
{
-
if (u->ckey[i] == mkey)
-
{
-
break;
-
}
-
}
-
u->n1--;
-
for (uint j = i; j < u->n1; j )
-
{
-
u->childp[j] = u->childp[j 1];
-
u->ckey[j] = u->ckey[j 1];
-
}
-
u->isDirty = 1;
-
break;
-
}
-
else
-
{
-
uint i = 0;
-
for (i = 0; i < u->n1; i )
-
{
-
if (u->ckey[i] > mkey)
-
break;
-
}
-
assert(i > 0);
-
i--;
-
mstack[mtop ] = i;
-
u = u->childp[i];
-
-
}
-
}
-
while (u != Troot)
-
{
-
ReadBlock(u);
-
-
if (u->n1 >= mind)
-
{
-
WriteBlock(u);
-
fixchno(u);
-
break;
-
}
-
bool flag = false;
-
//BNode* oldu = u;
-
flag = DealWithNode(u, false);
-
//assert(oldu == u);
-
-
u = u->fa;
-
mtop--;
-
if (flag)
-
{
-
fixchno(u);
-
break;
-
}
-
}
-
}
-
const int bigm = 1000000;
-
//int mark[bign];
-
set<int> used_set;
-
inline int getrand()
-
{
-
return (rand() * 32767ll rand()) % bigm;
-
}
-
void Btest()
-
{
-
used_set.clear();
-
int nown = 0;
-
for (int i = 0; i < bigm; i )
-
{
-
if (nown < 5000)
-
{
-
int x = getrand();
-
int cnt = 0;
-
for (int j = x; j != x - 1; j = (j 1) % bigm)
-
{
-
cnt ;
-
-
if (used_set.count(j) == 0)
-
{
-
used_set.insert(j);
-
nown ;
-
BInsert(j);
-
break;
-
}
-
else if(cnt > 10)
-
{
-
BDelete(j);
-
used_set.erase(j);
-
nown--;
-
}
-
}
-
}
-
else
-
{
-
int x = getrand();
-
if (x % 2 == 0)
-
{
-
int cnt = 0;
-
for (int j = x; j != x - 1; j = (j 1) % bigm)
-
{
-
cnt ;
-
if (used_set.count(j) == 0)
-
{
-
used_set.insert(j);
-
BInsert(j);
-
nown ;
-
break;
-
}
-
else if (cnt > 10)
-
{
-
BDelete(j);
-
used_set.erase(j);
-
nown--;
-
break;
-
}
-
}
-
}
-
else
-
{
-
auto it = used_set.lower_bound(x);
-
if (it != used_set.end())
-
{
-
x = *it;
-
}
-
else
-
{
-
x = *used_set.begin();
-
}
-
BDelete(x);
-
used_set.erase(x);
-
nown--;
-
}
-
}
-
if (i > 0 && i % 100 == 0)
-
{
-
int lastval = -1;
-
int cnt = 0;
-
for (List_entry* j = mhead->nxt; j != mhead; j = j->nxt)
-
{
-
BNode* tb = to_struct(j);
-
assert(tb->ckey[0] > lastval);
-
lastval = tb->ckey[tb->n1 - 1];
-
if (rand() % 30 == 0)
-
{
-
for (int k = 1; k < tb->n1; k )
-
{
-
assert(tb->ckey[k] > tb->ckey[k - 1]);
-
}
-
}
-
cnt = tb->n1;
-
}
-
assert(cnt == nown);
-
}
-
}
-
}
-
-
int main()
-
{
-
//int blocknum = 102;
-
//printf("%d\n", sizeof(dxfile));
-
init();
-
Btest();
-
//printf("%d\n", sizeof(BNode));
-
//printf("%d\n", Troot->isLeaf);
-
}
-
//flush all dirty BNode to disk when being exchanged out of memory or when program exited(WriteBlock called)
这份代码刚开始的时候是随便写的,不过后面写着写着发现挺有意思的就比较认真了(还写了测试代码),写B树主要是因为只有它没有写过。学完uCore以后我觉得当年要是读个计算机博士什么的早就毕业了。
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhgggcib
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
TikTok加速器哪个好免费的TK加速器推荐
TK小达人 10-01 -
怎样阻止微信小程序自动打开
PHP中文网 06-13