Permutation distribution of Friedman Test data

Because of the need to correct for multiple comparisons, I explored building a permutation distribution for the data analyzed using the Friedman test. This paper describes a procedure for doing so; however, it’s intricacies are pretty deep and tangled (for me, at least). The procedures it describes though involve switching condition labels, so that principle is something applied in the approach I took for my data. Basically, for half of the participants (selected via random sampling ) the condition labels are switched in random order. To maintain the proper shuffling of labels (for condition labels applied to specific subjects) a configuration file was first generated using this R code:

cc <- as.vector(c(rep(0,12),rep(1,12)))
con <- matrix(nrow=4000,ncol=27)
mat_row <- 0;

for (i in 1:nrow(con)){
    mat_row <- mat_row + 1
    con[mat_row,] <- c(sample(cc),sample(1:3))
}
write.table(con,file="perm_config_file.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)

This creates a 4000 X 27 configuration matrix, where each row is twenty-four long combination of 1’s and 0’s (12 and 12 for distinguishing which 12 of the 24 total subjects that will have labels switched) and 1, 2, 3 sampled in random order (for the ordering of the three conditions). One line then configures one permutation. There are 4000 rows for 4000 permutations of the whole-brain data to be run.

This is the R code for running these permutations via a Swift workflow:

#---- doing friedman permutations
#---- coded Wednesday; July 2, 2008

#---- this is the function used in the aggregate:
FriedmanPerm <- function(x){
    mm <- matrix(nrow=24,ncol=3)
    mm[,1] = x[1:24]
    mm[,2] = x[25:48]
    mm[,3] = x[49:72]
    a <- mm[subj_resamp,cond_resamp]
    b <- mm[subj_same,]
    mm_shuffle <- rbind(a,b)
    return(friedman.test(mm_shuffle)[[1]][[1]])
}

#---- Swift housekeeping:
allinputs <- Sys.getenv("R_SWIFT_ARGS");
print(Sys.getenv("R_SWIFT_ARGS"));
outname <- noquote(strsplit(allinputs," ")[[1]][1])
configLine <- as.integer(noquote(strsplit(allinputs," ")[[1]][2]))
print(configLine)

inputfile <- Sys.getenv("R_INPUT");
print(inputfile)
Query_out <- as.matrix(read.table(inputfile))
config <- as.matrix(read.table("perm_config_file.txt"))
cond_resamp <- as.vector(config[configLine,25:27])
subj_resamp <- which(as.vector(config[configLine,1:24])==1)
subj_same <- which(as.vector(config[configLine,1:24])==0)

#---- this is where the fun begins:
data_stack <- stack(data.frame(Query_out[,3:5]))[1]
vertices <- Query_out[,2]
nn <- data.frame(cbind(vertices,data_stack))
m <- matrix(nrow=length(as.integer(levels(as.factor(vertices)))),ncol=2)
m[,1] <- as.integer(levels(as.factor(vertices)))
m[,2] <- aggregate(nn$values, list(nn$vertices),FriedmanPerm)[,2]
write.table(round(m,5), file=paste(configLine,"PermFriedman",outname,".txt",sep=""), row.names=FALSE, col.names=FALSE, quote=F)

The Swift code sends this R script as well as the configuration matrix file to the grid site, where it does the query and analysis. Besides sending the configuration file, it performs the same db query and is otherwise much the same code in the previous post for executing the Friedman test.

type file{}

(file qout, file rout) run_query (string allcatargs, file config, file r_script, file r_config_table){
    app{
        Mediator allcatargs stdout=@filename(qout) @filename(r_script) @filename(r_config_table);
    }
}

string user = @arg("user");
string db = "yourdb";
string host = "yourhost";

file r_script<single_file_mapper; file="permFriedman.R">;
file r_config_table<single_file_mapper; file="perm_config_file.txt">;
file config<single_file_mapper; file="user.config">;

loop_query(int bvox, string user, string db, string host, string query_outline, file r_script, file config, file r_config_table, string id){
    int evox = bvox+13999;
    string baseid = "PermFriedman";
    string r_swift_args = @strcat(id);
    string theoutprefix = @strcat(id,baseid,bvox,"_",evox);
    string med_args = @strcat("--user ","andric",
        " --conf ", "user.config",
        " --db ", db,
        " --host ", host,
        " --query ", query_outline,
        " --r_script ", @filename(r_script),
        " --begin_vox ", bvox,
        " --end_vox ", evox,
        " --outprefix ", theoutprefix,
        " --batchstep ", "14000",
        " --r_swift_args ", r_swift_args,
        " --subject ", id);
    file q_result <single_file_mapper; file=@strcat("query_results/",theoutprefix,".qresult")>;
    file r_result <single_file_mapper; file=@strcat(theoutprefix,".txt")>;
    (q_result, r_result) = run_query(med_args, r_script, config, r_config_table);
}

int permbrains = [1:1000:1];
foreach perm in permbrains {
    int mybatches = [1:196000:14000];
    foreach batch in mybatches {
        string id = @strcat(perm);
        string query_outline = @strcat("select subject, vertex, speech_lag, emblem_lag, embspeech_lag from ccf_phase2_lh where vertex between BEGIN_BATCH and END_BATCH");
        loop_query(batch, user, db, host, query_outline, r_script, config, r_config_table, id);
    }
}

Analysis using the Friedman Test

The data are within-subjects and ordinal (possible values are integers -2 to 2) at every vertex (it’s surface data in the group space).
Here is the R code:

#---- R script for carrying out friedman test
#---- this is a test script to use functions in the aggregate
FriedmanStat <- function(x){
    mm <- matrix(nrow=24,ncol=3)
    mm[,1] = x[1:24]
    mm[,2] = x[25:48]
    mm[,3] = x[49:72]
    return(friedman.test(mm)[[1]][[1]])
}

FriedmanP <- function(x){
    mm <- matrix(nrow=24,ncol=3)
    mm[,1] = x[1:24]
    mm[,2] = x[25:48]
    mm[,3] = x[49:72]
    return(friedman.test(mm)[[3]][[1]])
}

Query_out <- as.matrix(read.table("test_aov.lh.txt"))
data_stack <- stack(data.frame(Query_out[,3:5]))[1]
vertices <- Query_out[,2]
nn <- data.frame(cbind(vertices,data_stack))
m <- matrix(nrow=length(as.integer(levels(as.factor(vertices)))),ncol=3)
m[,1] <- as.integer(levels(as.factor(vertices)))
m[,2] <- aggregate(nn$values, list(nn$vertices),FriedmanStat)[,2]
m[,3] <- 1 - (aggregate(nn$values, list(nn$vertices),FriedmanP)[,2])
write.table(round(m,5), file=paste("test.txt",sep=""), row.names=FALSE, col.names=FALSE, quote=F)

The query that will be used:

select subject, vertex, speech_lag, emblem_lag, embspeech_lag from ccf_phase2_rh where seed_region = 'region' and vertex between BEGIN_BATCH and END_BATCH;

where BEGIN_BATCH and END_BATCH are unique batch sets established in the Swift code.
The Swift code uses our “Mediator” app that instantiates database queries on the grid sites:

type file{}

(file qout, file rout) run_query (string allcatargs, file config, file r_script){
    app{
        Mediator allcatargs stdout=@filename(qout) @filename(r_script);
    }
}

string user = @arg("user");
string db = "yourdb";
string host = "yourhost";
string baseid = "rh_NonParam";

file r_script<single_file_mapper; file="scripts/FriedmanTest.R">;
file config<single_file_mapper; file="user.config">;

loop_query(int bvox, string user, string db, string host, string query_outline, file r_script, file config, string id){
    int evox = bvox+499;
    string r_swift_args = @strcat(id);
    string theoutprefix = @strcat(id,bvox,"_",evox);
    string med_args = @strcat("--user ","andric",
        " --conf ", "user.config",
        " --db ", db,
        " --host ", host,
        " --query ", query_outline,
        " --r_script ", @filename(r_script),
        " --begin_vox ", bvox,
        " --end_vox ", evox,
        " --outprefix ", theoutprefix,
        " --batchstep ", "500",
        " --r_swift_args ", r_swift_args,
        " --subject ", id);
    file q_result <single_file_mapper; file=@strcat("results/",theoutprefix,".qresult")>;
    file r_result <single_file_mapper; file=@strcat(theoutprefix,".txt")>;
    (q_result, r_result) = run_query(med_args, r_script, config);
}

string regions = ["IDEAL"];
foreach region in regions {
    int mybatches = [1:196000:500];
    foreach batch in mybatches {
        string id = @strcat(region, baseid);
        string query_outline = @strcat("select subject, vertex, speech_lag, emblem_lag, embspeech_lag from ccf_phase2_rh where seed_region = '",region,"' and vertex between BEGIN_BATCH and END_BATCH");
        loop_query(batch, user, db, host, query_outline, r_script, config, id);
    }
}

It would also be useful to embed the hemispheres in a for loop to distribute via Swift. As this is, it’s my “rh” version – and I also have a “lh” version that queries the left hemisphere data.