[問題] 把兩組dataset合併一起跑原本的程式的方法?已回收
大家好,想請教一下我現在做的是classification的實驗
是把原本的dataset的圖片分成training set和test set
然後利用training set去做相似度比較後,移除較差影響表現的instance
再拿改進過後的training set給test set用
不過我現在若是想改成,把全部的dataset當作traing set用
利用比較多的data來跑出一個比較好的accuracy
應該要怎麼修改原本的程式呢? 似乎不能夠直接把dataset總數member_num/2不除2即可
因為會造成index大小不夠的問題?
??? Attempted to access index(1,772); index out of bounds because
size(index)=[771,771].
Error in ==> query>cal_Precision at 148
while (ismember(index(src(i),j),dst) == 0)
Error in ==> query at 113
correct = cal_Precision(class, index, testing, training2);
以下部分的code請強者指教
member_num2 = ceil(member_num/2); %% 這邊做的就是把原本dataset分成兩份
但是我不想分成兩份卻不能直接拿掉/2...
for k = 1:10 %% 連續跑十次,因為我想求的是多次亂數分類後的平均
training_testing = create_dataset(inputFile,member_num,class_num);
training1 = find(training_testing == 0);
testing = find(training_testing == 1);
z
% T_F: 列出training set中的每個圖片correct+和false+的數量 (訓練挑選的過程)
T_F = zeros(numFile,2);
for i = 1:size(training1,1)
if (class(index(training1(i),2)) == class(training1(i)))
T_F(index(training1(i),2),1) = T_F(index(training1(i),2),1) + 1;
else
T_F(index(training1(i),2),2) = T_F(index(training1(i),2),2) + 1;
end
end
T_F_value = sprintf('T_F_value_%d.mat',k);
tmp = struct2cell(inputFile);
tmp2 = tmp(1,training1)';
tmp3 = T_F(training1,:);
save(T_F_value,'tmp2','tmp3');
% accuracy 1 %% 第一個accuracy,training set自己做相似度比較
correct = cal_Precision(class, index, training1, training1);
precision(:,k) = correct(1:class_num,1)./member_num2(1:class_num,1);
x1 = mean(precision(:,k));
% accuracy 2 %% 第二個accuracy,移除掉表現不佳data後training set的accuracy
training_testing(intersect(find(T_F(:,2) > 0), find(T_F(:,1) < 1))) = 1;
training2 = find(training_testing == 0);
correct = cal_Precision(class, index, training1, training2);
precision(:,k) = correct(1:class_num,1)./member_num2(1:class_num,1);
x2 = mean(precision(:,k));
% accuracy 3 %%第三個accuracy,拿test set去對traing set做相似度比較
correct = cal_Precision(class, index, testing, training2);
precision(:,k) = correct(1:class_num,1)./member_num2(1:class_num,1);
x3 = mean(precision(:,k));
y = [k,x1,x2,x3]
end
% average precision for each category
mean(precision,2);
% overall average precision
mean(mean(precision))
bar (precision(1:class_num,1), 'DisplayName', 'mean(precision,2)',
'YDataSource', 'mean(precision,2)');
figure(gcf)
end
function training_testing = create_dataset(inputFile,member_num,class_num)
start = 1;
training_testing = zeros(size(inputFile));
for i = 1:class_num
perm = randperm(member_num(i));
training_testing(start-1+perm(1:ceil(member_num(i)/2))) = 1;
%%上面這行不能直接拿掉/2,會出現上述的error
start = start + member_num(i);
end
end
function correct = cal_Precision(class, index, src, dst) %%可能需要修改部分
correct = zeros(class(size(class,1)),1);
for i = 1:size(src)
j = 2;
while (ismember(index(src(i),j),dst) == 0) %% index大小有問題的部分?
j = j + 1;
end
if (class(index(src(i),j)) == class(src(i)))
correct(class(src(i))) = correct(class(src(i)))+1;
end
end
end
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 71.95.57.98
→
06/10 14:54, , 1F
06/10 14:54, 1F