Feb 5 2009 Austin Hilliard. Introduction to resampling in MATLAB. So you've done an experiment. Two independent datasets: control experimental Have n numbers in each dataset, representing independent measurements Question:
Download Policy: Content on the Website is provided to you AS IS for your information and personal use and may not be sold / licensed / shared on other websites without getting consent from its author.While downloading, if for some reason you are not able to download a presentation, the publisher may have deleted the file from their server.
Feb 5 2009
Austin Hilliard
Did the experimental manipulation have an effect, i.e. did it make a difference?
→ datadescriptors
→ test statistic
→ distribution of test statistics generated from datasets that ARE from the same underlying distribution
ctrl = 10.*(5.75+randn(25,1));
exp = 10.*(5+randn(25,1));
boxplot([ctrl exp])
groups = {'control''experimental'}
boxplot([ctrl exp],'labels',groups)
hist(ctrl) hist(exp)
title('Control group') title('Experimental group')
ctrl_d = mean(ctrl);
exp_d = mean(exp);
test_stat = abs(ctrl_d - exp_d);
→ distribution of test stat under null hypothesis
box = [ctrl; exp]; % combine datasets into 'box'
% could use concat()
pseudo_stat_dist = zeros(1,10000); % create vector 'pseudo_stat_dist'
% to store results of resampling
% --- start resampling ---
for trials = 1:10000 % resample 10000 times
pseudo_ctrl = ... % create pseudo groups to be
sample(length(ctrl), box); % same size as actual groups
pseudo_exp = ...
sample(length(exp), box);
pseudo_ctrl_d = ... % compute pseudo group
mean(pseudo_ctrl); % descriptors
pseudo_exp_d = ...
mean(pseudo_exp);
pseudo_stat = ... % compute pseudo test stat
abs(pseudo_ctrl_d - pseudo_exp_d);
pseudo_stat_dist(trials) = ... % store pseudo test stat to
pseudo_stat; % build null distribution
end
→ p-value
bigger = count(pseudo_stat_dist > test_stat); % count pseudo test stats
% bigger than actual stat
pval = bigger / length(pseudo_stat_dist) % divide by total number
% of pseudo test stats to
% compute p-value
% could use proportion()
hist(pseudo_stat_dist) % show histogram of pseudo
title('Pseudo test stat distribution') % test stats
xlabel('Pseudo test stat')
ylabel('Frequency')
Restated →
→ Test statistic is computed from these differences, not the raw datasets
diffs = ctrl - exp; % compute change from ctrl condition
% to exp condition
test_stat = abs(mean(diffs)); % compute test statistic
pseudo_stat_dist = zeros(1,10000); % create vector to store pseudo test
% stats generated from resampling
for trials = 1:10000 % resample 10,000 times
signs = sample(length(diffs), [-1 1])'; % randomly create vector of 1's
% and -1's same size as diffs
pseudo_diffs = signs .* diffs; % multiply by actual diffs to
% randomize direction of diff
pseudo_stat = abs(mean(pseudo_diffs)); % compute pseudo test stat from
% pseudo diffs
pseudo_stat_dist(trials) = pseudo_stat; % store pseudo test stat to
% grow null distribution
end
bigger = count(pseudo_stat_dist > test_stat); % count pseudo test stats
% bigger than actual stat
pval = bigger / length(pseudo_stat_dist) % divide by total number
% of pseudo test stats to
% compute p-value
% could use proportion()
hist(pseudo_stat_dist) % show histogram of pseudo
title('Pseudo test stat distribution') % test stats
xlabel('Pseudo test stat')
ylabel('Frequency')
function [] = bootstrap2groupsMEAN_PAIRED (ctrl, exp)
% --- prepare for resampling ---
diffs = ctrl - exp; % compute change from ctrl condition
% to exp condition
test_stat = abs(mean(diffs)) % compute test statistic
pseudo_stat_dist = zeros(1,10000); % create vector to store pseudo test
% stats generated from resampling
% --- resampling ---
for trials = 1:10000 % resample 10,000 times
signs = sample(length(diffs), [-1 1])'; % randomly create vector of 1's
% and -1's same size as diffs
pseudo_diffs = signs .* diffs; % multiply by actual diffs to
% randomize direction of diff
pseudo_stat = abs(mean(pseudo_diffs)); % compute pseudo test stat from
% pseudo diffs
pseudo_stat_dist(trials) = pseudo_stat; % store pseudo test stat to
% grow null distribution
end
% --- end resampling ---
bigger = count(pseudo_stat_dist > test_stat); % count pseudo test stats
% bigger than actual stat
pval = bigger / length(pseudo_stat_dist) % divide by total number
% of pseudo test stats to
% compute p-value
% could use proportion()
hist(pseudo_stat_dist) % show histogram of pseudo
title('Pseudo test stat distribution') % test stats
xlabel('Pseudo test stat')
ylabel('Frequency')
function [] = bootstrap2groupsMEANS (ctrl, exp)
%
% function [] = bootstrap2groupsMEANS (ctrl, exp)
%
% 'ctrl' and 'exp' are assumed to be column vectors, this is
% necessary for creation of 'box'
%
% --- prepare for resampling ---
ctrl_d = mean(ctrl); % compute descriptors
exp_d = mean(exp); % 'ctrl_d' and 'exp_d'
test_stat = abs(ctrl_d - exp_d) % compute test statistic 'test_stat'
box = [ctrl; exp]; % combine datasets into 'box'
% could use concat()
pseudo_stat_dist = zeros(1,10000); % create vector 'pseudo_stat_dist'
% to store results of resampling
% --- start resampling ---
for trials = 1:10000 % resample 10,000 times
pseudo_ctrl = ... % create pseudo groups to be
sample(length(ctrl), box); % same size as actual groups
pseudo_exp = ...
sample(length(exp), box);
pseudo_ctrl_d = ... % compute pseudo group
mean(pseudo_ctrl); % descriptors
pseudo_exp_d = ...
mean(pseudo_exp);
pseudo_stat = ... % compute pseudo test stat
abs(pseudo_ctrl_d - pseudo_exp_d);
pseudo_stat_dist(trials) = ... % store pseudo test stat to
pseudo_stat; % build null distribution
end
% --- end resampling ---
bigger = count(pseudo_stat_dist > test_stat); % count pseudo test stats
% bigger than actual stat
pval = bigger / length(pseudo_stat_dist) % divide by total number
% of pseudo test stats to
% compute p-value
% could use proportion()
hist(pseudo_stat_dist) % show histogram of pseudo
title('Pseudo test stat distribution') % test stats
xlabel('Pseudo test stat')
ylabel('Frequency')