######## Step 0 ######################################################### ## Read in dataset0 - needs to be in data frame format ## The data structure: ## Column 1: SubjectID, ## Column 2: Treatment Indicator [called Trt in the function: Trt=1 in treatment,Trt=0 control] ## Column 3: outcome * ## * If it is a survival outcome, the Column 3 is the time to event [called TIME in the function] ## and column 4 is censoring status 1=event occured, 0=censored [called EVENT in the function] ## The rest of the columns (from Column c1 to Column c2) are binary covariate to define cells ## c1 and c2 are the starting and ending Column numbers of the binary covariates in the dataset ## type is the type of endpoint - type="cts" for continuous endpoint ## - type="bi" for binary endpoint ## - type="sv" for survival endpoint ## p is the probability each cell will be selected as in the paper ## k is the number of subpopulations to be drawn as in the paper ## n2 is the number of simulation ran to estimate the null distributions ## a is the required smallest number of cells included in one sub-population, the default number is 2 ## i.e. to form one sub-population, at least 2 cells are required ## Two options are given in step 3, choose one of them to use, detault is option = "op2" ## option = "op1" is Parallel Computing, cl can be specified to be the number of worker clusters ## created and used, the default cl = 2 ## option = "op2" is simplw for loop ############# Z-statistics Functions ################ # z-statistics function for continuous endpoint fnz.cts=function(ncell,dataset,p,a){ flip0=rbinom(ncell,1,p) while (sum(flip0)0,result1,0),na.rm = T) T_D.2=mean(ifelse(result1<0,result1,0),na.rm = T) ###### Step 3 ####### ###### Estimate the Distribution of S(D*) and T(D*) under the null hypothesis, as shown in the paper ####### # A function which runs z-statistics function using k selected sub-populations # # assign fn.z to be one of the functions: fnz.cts, fnz.bi, fnz.sv fk=function(k){ # Shuffle the treatment assignment in each cell dataset0$Trt is the Trt variable dataset$Trt=unlist(tapply(dataset0$Trt,cellname,sample)) res=replicate(k,fn.z(ncell,dataset,p,a)) c(max(res,na.rm = T),min(res,na.rm = T), mean(ifelse(res>0,res,0),na.rm = T), mean(ifelse(res<0,res,0),na.rm = T)) } ## Under null hypothesis, the distribution of S(D*) and T(D*) ## if(option=="op1"){ #Option 1: Parallel Computing #cl=40 # the number of worker clusters will be created and used, default is 2 registerDoParallel(cl) Tstat=foreach(i=1:n2,.combine='rbind', .packages='survival') %dorng% fk(k) } else if (option=="op2"){ #Option 2: for loop Tstat=NULL for (i in 1:n2){ Tstat0=fk(k) Tstat=rbind(Tstat,Tstat0) } } else { print("error") } ###### Step 4 ####### ###### Calculate under the null distribution ########## ###### the probability of any value exceeding the observed S(D) and T(D) ####### ## Extreme Value Test I.S.1=round(length(which(Tstat[,1]>S_D.1))/n2,digits=4) I.T.1=round(length(which(Tstat[,2]S_D.2))/n2,digits=4) I.T.2=round(length(which(Tstat[,4]