#Please note that data folder should contain only data files
#Datasets should be in tab separated matrix format, each row containing expression of that and each column indicating that sample 
#There should be the only two unique column names in a dataset (each referring to one condition e.g. cancer, healthy) and these should be the same across all datasets 
#The first column should contain gene names
#Enter the two locations below:

data.loc = "your data folder path along with the data folder name"
res.loc = "your result folder path along with the result folder name"

#You do not need to enter anything after this line of code, run the rest of the code as it is!

extr.nth.elm=function(x, n)
return(unlist(x)[n])

data.files = dir(data.loc)

genes = NULL
conditions = NULL
for(k in 1:length(data.files))
{
	data = read.table(paste(data.loc, data.files[k], sep="/"), sep="\t", header=T, row.names = 1)
	data = as.matrix(data)
	genes = unique(c(genes, rownames(data)))
	conditions  = c(conditions, unique(colnames(data)))
}

conditions = sapply(strsplit(conditions, "[.]"), extr.nth.elm, 1)
conditions = unique(conditions)
if(length(conditions)!=2)
stop("one or more dataset(s) contain do not contain exactly two conditions")
genes = sort(unique(genes))

m2 = matrix(rep(genes, length(genes)), length(genes), length(genes))
m1 = t(m2)
ltm = lower.tri(m1)
n1 = m1[ltm]
n2 = m2[ltm]
pair.names = paste(n1, n2, sep="_")
rm(m1, m2, n1, n2)

write.table(genes, paste(res.loc, "gene.list.txt", sep="/"), row.names=F, sep="\t")
write.table(pair.names, paste(res.loc, "gene.pairs.list.txt", sep="/"), row.names=F, sep="\t")

cnt = matrix(rep(0, length(genes)*length(genes)), length(genes), length(genes) )
for(k in 1:length(data.files))
{
	data = read.table(paste(data.loc, data.files[k], sep="/"), sep="\t", header=T)
	data=as.matrix(data)
	data.genes = data[,1]
	data = data[,-1]
	hdrs = sapply(strsplit(colnames(data), "[.]"), extr.nth.elm, 1)
	colnames(data) = hdrs

	new.data = matrix(NA, nrow = length(genes), ncol = length(data[1,]))
	colnames(new.data) = colnames(data)
	index = match(genes, data.genes)
	new.data = data[index, ]
	rownames(new.data) = genes

	con1.data = new.data[, which(colnames(new.data) == conditions[1])]
	con2.data = new.data[, which(colnames(new.data) == conditions[2])]

	con1.data = t(apply(con1.data, 2, as.numeric))
	con2.data = t(apply(con2.data, 2, as.numeric))

	con1.mat = rcorr(con1.data)
	con2.mat = rcorr(con2.data)

	rm(data, new.data, con1.data, con2.data)

	con1.cor = con1.mat[[1]]
	con1.n = con1.mat[[2]]
	rm(con1.mat)
	
	ind.na.con1 = which(con1.n<4)

	con2.cor = con2.mat[[1]]
	con2.n = con2.mat[[2]]
	rm(con2.mat)

	ind.na.con2 = which(con2.n<4)
	
	con1.z = atanh(con1.cor)
	con2.z = atanh(con2.cor)
	diff.z = con1.cor - con2.cor
	var.z = (1/(con1.n - 3)) + (1/(con2.n - 3))
	cnt = cnt + 1
	
	ind.na = unique(c(which(is.na(diff.z)), ind.na.con1, ind.na.con2))
	if(length(ind.na) > 1){
	diff.z[ind.na] = 0
	var.z[ind.na] = 0
	cnt[ind.na] = cnt[ind.na] - 1 }
	
	if(k == 1){
	s.diff.z = diff.z
	s.var.z = var.z}
	
	rm(diff.z, var.z)
	
	if(k>1)
	s.diff.z = s.diff.z + diff.z
	s.var.z = s.var.z + var.z
	
}
q.diff.z = s.diff.z / sqrt(s.var.z)
rm(s.diff.z, s.var.z)
p.diff.z = 2*pnorm(abs(q.diff.z), lower.tail = F)

colnames(q.diff.z) = genes
rownames(q.diff.z) = genes
colnames(p.diff.z) = genes
rownames(p.diff.z) = genes
colnames(cnt) = genes
rownames(cnt) = genes

write.table(q.diff.z, paste(res.loc, "q_scores.txt", sep = "/"), sep = "\t")
write.table(p.diff.z, paste(res.loc, "p_values.txt", sep = "/"), sep = "\t")
write.table(cnt, paste(res.loc, "counts.txt", sep = "/"), sep = "\t")