[問題] 不知道算不算資料結構的問題

看板C_and_CPP作者 (我是我 我不是我)時間13年前 (2012/07/23 09:44), 編輯推噓1(101)
留言2則, 1人參與, 最新討論串1/1
開發平台(Platform): (Ex: VC++, GCC, Linux, ...) GCC 額外使用到的函數庫(Library Used): (Ex: OpenGL, ...) NONE 問題(Question): 大家好,我想問像下面這樣的檔案類型或資料結構,是否有專書介紹? 1.檔案A被產生以後,先allocate 1個基本的block.(以下用BLK1代表). block size=512 Bytes 2.BLK1的空間此struct定義 struct BLK{ U16 parent; //父BLK initial為0 U16 parent_entry; //本身在父BLK裡面所佔的 initial為0 U16 self; //本身的BLK編號,此變數值被initial為1 U16 valid_entry; //本身內含有效的資料比數 initial為0 U32 entry[48+1]; //真正存放資料的array initial為0 struct Daughter[48+2]; //子BLK initial為0 U8 dummy[8]; }; struct Daughter{ U16 daughter; //子BLK編號 U32 descendant_cnt; //子BLK內含資料筆數 }; 3.開始存入資料 當存入第1筆(Count=0)後,BLK1內容如下: parent=0 parent_entry=0 self=1 valid_entry=1 //entry[]內含1筆資料 entry[]= 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//雖然什麼都看不出來, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//實際上entry[0]=0 0,0,0,0,0,0,0,0,0, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 存入到第3筆時(Count=2),BLK1內容如下: parent=0 parent_entry=0 self=1 valid_entry=3 //entry[]內含3筆資料 entry[]= 0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,//entry[0~2]=0,1,2 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 存到第48筆(Count=47),BLK1內容如下: parent=0 parent_entry=0 self=1 valid_entry=48 entry[]= 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19, 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39, 40,41,42,43,44,45,46,47,0, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 4.存到第49筆時(Count=48),因為BLK1已經滿了,所以這時再 allocate 2個BLK,分別為BLK2,BLK3。3個BLK內容如下: BLK1 parent=2 //BLK2當作BLK1與BLK3的父BLK parent_entry=0 //自己在父BLK內daughter[]的位置 self=1 valid_entry=24 //資料分一半出去到BLK2及BLK3 entry[]= //內容雖不變但其實只有0~23有效 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19, 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39, 40,41,42,43,44,45,46,47,48, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, BLK2 parent=0 //成為BLK1與BLK3的父BLK parent_entry=0 self=2 //本身的BLK編號 valid_entry=1 //內含一筆有效資料 entry[]= //即只有第24筆資料放在BLK2 24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, daughter[]= //紀錄子BLK1含24筆, 子BLK3也含24筆資料 1_24,3_24,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, BLK3 parent=2 //BLK2當作BLK1與BLK3的父BLK parent_entry=1 //自己在父BLK內daughter[]的位置 self=3 //本身的BLK編號 valid_entry=24 //內含有效資料數目 entry[]= //原本BLK1內的資料25~48改到此.(後面無意義) 25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44, 45,46,47,48,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39, 40,41,42,43,44,45,46,47,48, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 簡單的說就是變成以下這樣 BLK2 (1,24) / \ BLK1 BLK3 (24,0~23) (24,25~48) 5.資料繼續增加,會放到BLK3,當存到第73筆資料時(Count=72), 再次allocate一Block,BLK4。此時各BLK內容如下: BLK1-從Count=48時到現在都沒有變過 BLK2 parent=0 parent_entry=0 self=2 valid_entry=2 //BLK3 split時,資料49改存放到BLK2 entry[]= 24,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, daughter[]= //又多了BLK4一個子BLK 1_24,3_24,4_24,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, BLK3 parent=2 parent_entry=1 self=3 valid_entry=24 //和當初BLK1一樣,資料分一半出去到BLK2,BLK4 entry[]= //只有資料25~48有意義 25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44, 45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, 65,66,67,68,69,70,71,72,73, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, BLK4 parent=2 //為BLK2的子BLK parent_entry=2 //自己在父BLK內daughter[]的位置 self=4 //本身的BLK編號 valid_entry=24 //內含有效資料數目 entry[]= //原本BLK1內的資料50~73改到此.(後面無意義) 50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69, 70,71,72,73,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, 65,66,67,68,69,70,71,72,73, daughter[]= 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0,0_0, 族譜變這樣 BLK2 (2,24,49) / | \ BLK1 BLK3 BLK4(24,50~73) (24,0~23)(24,25~48) 以下的推演可知將來會變這樣 BLK1 內含0~23 BLK2 內含24,49,74,99,124,... BLK3 內含25~48 BLK4 內含50~73 BLK5 內含75~98 BLK6 內含100~123 BLK7 內含125~148 ... 以上的敘述其實都是看source code以及實際跑一些模擬而歸納出來的。 請問這種檔案資料存放的規則是否有專書介紹? 有數學公式可以預測每一步? 如果BLK2內的資料數超過48而要分割時,會變成怎樣? BLK內資料刪除時會怎樣? 還有一直沒提到的就是其實這個檔案A內裡面每個資料是另一種類型檔案B內資料 的索引。其實都是排序過的,如果檔案B內的資料重新排序,B本身內容不動, 其實是A的內容改變。所以上面的例子其實是在一種簡化的情形下的case。 也就是檔案B資料增加時,都沒有影響到排序,所以好像每個BLK split完就沒事了。 真實的情況更加複雜,也就是有資料會被再加入前面的BLK,造成BLK第二次塞滿 需要分割。所以檔案A每個BLK故意只放半滿就是為了保持這種彈性。 -- ※ 發信站: 批踢踢實業坊(ptt.cc) ◆ From: 59.124.0.175 ※ 編輯: WillyLin 來自: 59.124.0.175 (07/23 09:51) ※ 編輯: WillyLin 來自: 59.124.0.175 (07/23 10:20)

07/23 10:42, , 1F
看起來像資料庫存取常用的 B-tree...
07/23 10:42, 1F

07/23 10:45, , 2F
仔細回想一下應該是所謂的 B+ tree 的樣子
07/23 10:45, 2F
文章代碼(AID): #1G3AnooC (C_and_CPP)