[問題] 不知道算不算資料結構的問題
開發平台(Platform): (Ex: VC++, GCC, Linux, ...)
GCC
額外使用到的函數庫(Library Used): (Ex: OpenGL, ...)
NONE
問題(Question):
大家好,我想問像下面這樣的檔案類型或資料結構,是否有專書介紹?
1.檔案A被產生以後,先allocate 1個基本的block.(以下用BLK1代表).
block size=512 Bytes
2.BLK1的空間此struct定義
struct BLK{
U16 parent; //父BLK initial為0
U16 parent_entry; //本身在父BLK裡面所佔的 initial為0
U16 self; //本身的BLK編號,此變數值被initial為1
U16 valid_entry; //本身內含有效的資料比數 initial為0
U32 entry[48+1]; //真正存放資料的array initial為0
struct Daughter[48+2]; //子BLK initial為0
U8 dummy[8];
};
struct Daughter{
U16 daughter; //子BLK編號
U32 descendant_cnt; //子BLK內含資料筆數
};
3.開始存入資料
當存入第1筆(Count=0)後,BLK1內容如下:
parent=0
parent_entry=0
self=1
valid_entry=1 //entry[]內含1筆資料
entry[]=
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//雖然什麼都看不出來,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//實際上entry[0]=0
0,0,0,0,0,0,0,0,0,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
存入到第3筆時(Count=2),BLK1內容如下:
parent=0
parent_entry=0
self=1
valid_entry=3 //entry[]內含3筆資料
entry[]=
0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//entry[0~2]=0,1,2
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
存到第48筆(Count=47),BLK1內容如下:
parent=0
parent_entry=0
self=1
valid_entry=48
entry[]=
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,
20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,
40,41,42,43,44,45,46,47,0,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
4.存到第49筆時(Count=48),因為BLK1已經滿了,所以這時再
allocate 2個BLK,分別為BLK2,BLK3。3個BLK內容如下:
BLK1
parent=2 //BLK2當作BLK1與BLK3的父BLK
parent_entry=0 //自己在父BLK內daughter[]的位置
self=1
valid_entry=24 //資料分一半出去到BLK2及BLK3
entry[]= //內容雖不變但其實只有0~23有效
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,
20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,
40,41,42,43,44,45,46,47,48,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
BLK2
parent=0 //成為BLK1與BLK3的父BLK
parent_entry=0
self=2 //本身的BLK編號
valid_entry=1 //內含一筆有效資料
entry[]= //即只有第24筆資料放在BLK2
24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,
daughter[]= //紀錄子BLK1含24筆, 子BLK3也含24筆資料
1_24,3_24,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
BLK3
parent=2 //BLK2當作BLK1與BLK3的父BLK
parent_entry=1 //自己在父BLK內daughter[]的位置
self=3 //本身的BLK編號
valid_entry=24 //內含有效資料數目
entry[]= //原本BLK1內的資料25~48改到此.(後面無意義)
25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,
45,46,47,48,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,
40,41,42,43,44,45,46,47,48,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
簡單的說就是變成以下這樣
BLK2
(1,24)
/ \
BLK1 BLK3
(24,0~23) (24,25~48)
5.資料繼續增加,會放到BLK3,當存到第73筆資料時(Count=72),
再次allocate一Block,BLK4。此時各BLK內容如下:
BLK1-從Count=48時到現在都沒有變過
BLK2
parent=0
parent_entry=0
self=2
valid_entry=2 //BLK3 split時,資料49改存放到BLK2
entry[]=
24,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,
daughter[]= //又多了BLK4一個子BLK
1_24,3_24,4_24,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
BLK3
parent=2
parent_entry=1
self=3
valid_entry=24 //和當初BLK1一樣,資料分一半出去到BLK2,BLK4
entry[]= //只有資料25~48有意義
25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,
45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,
65,66,67,68,69,70,71,72,73,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
BLK4
parent=2 //為BLK2的子BLK
parent_entry=2 //自己在父BLK內daughter[]的位置
self=4 //本身的BLK編號
valid_entry=24 //內含有效資料數目
entry[]= //原本BLK1內的資料50~73改到此.(後面無意義)
50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,
70,71,72,73,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,
65,66,67,68,69,70,71,72,73,
daughter[]=
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,
族譜變這樣
BLK2
(2,24,49)
/ | \
BLK1 BLK3 BLK4(24,50~73)
(24,0~23)(24,25~48)
以下的推演可知將來會變這樣
BLK1 內含0~23
BLK2 內含24,49,74,99,124,...
BLK3 內含25~48
BLK4 內含50~73
BLK5 內含75~98
BLK6 內含100~123
BLK7 內含125~148
...
以上的敘述其實都是看source code以及實際跑一些模擬而歸納出來的。
請問這種檔案資料存放的規則是否有專書介紹?
有數學公式可以預測每一步?
如果BLK2內的資料數超過48而要分割時,會變成怎樣?
BLK內資料刪除時會怎樣?
還有一直沒提到的就是其實這個檔案A內裡面每個資料是另一種類型檔案B內資料
的索引。其實都是排序過的,如果檔案B內的資料重新排序,B本身內容不動,
其實是A的內容改變。所以上面的例子其實是在一種簡化的情形下的case。
也就是檔案B資料增加時,都沒有影響到排序,所以好像每個BLK split完就沒事了。
真實的情況更加複雜,也就是有資料會被再加入前面的BLK,造成BLK第二次塞滿
需要分割。所以檔案A每個BLK故意只放半滿就是為了保持這種彈性。
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 59.124.0.175
※ 編輯: WillyLin 來自: 59.124.0.175 (07/23 09:51)
※ 編輯: WillyLin 來自: 59.124.0.175 (07/23 10:20)
推
07/23 10:42, , 1F
07/23 10:42, 1F
→
07/23 10:45, , 2F
07/23 10:45, 2F