################################################################### # --- Regularized ROC with the TGDR for variable selection # --- The function for cross validation # --- with fixed tau; cross validate over number of iterations # --- the first covariate is the anchor # Written by Shuangge Ma (joint with Jian Huang), Dec. 2005 ################################################################### ind<-read.table("indicator.txt")$V1; z.dat<-read.table("covariate.txt"); sigma.n<-0.50; # --- sigma.n selected using another program anchor<-1; V<- 5; step<- 1000; increase<-0.01; tau<-1.0; samplesize<-nrow(z.dat); num.cov<-ncol(z.dat); z<-matrix(0, samplesize, num.cov); for(i in 1:num.cov) z[ ,i]<-z.dat[[i]]; z<-z/sigma.n; aug.cross<-400; aug.eva<-25; delete.size<-as.integer(samplesize/V); criteria<- rep(0, step); cross.position<-rep(0, samplesize); for(i in 1:V) for(j in 1:delete.size) cross.position[(i-1)*delete.size+j]<-i; # randomly permute the data, so that there will be no "hiden pattern" cross.position<-cross.position[order(runif(samplesize, 0, 1))]; for(v in 1:V){ # --- cut data --- cross.ind<-ind[cross.position!=v]; cross.z<-z[cross.position!=v, ]; case.z<-cross.z[cross.ind==1, ]; control.z<-cross.z[cross.ind==0, ]; evaluate.ind<-ind[cross.position==v]; evaluate.z<-z[cross.position==v, ]; e.case.z<-evaluate.z[evaluate.ind==1, ]; e.control.z<-evaluate.z[evaluate.ind==0, ]; # --- data augmentation --- # --- to make different cross validation sets comparable --- aug.cross.data<-matrix(0, aug.cross, num.cov); for(i in 1:aug.cross){ p1<-as.integer(runif(1, min=1, max=(nrow(case.z)+1))); p2<-as.integer(runif(1, min=1, max=(nrow(control.z)+1))); for(j in 1:num.cov){ aug.cross.data[i, j]<- case.z[p1, j]-control.z[p2, j]; } } aug.evaluate.data<-matrix(0, aug.eva, num.cov); for(i in 1:aug.eva){ p1<-as.integer(runif(1, min=1, max=(nrow(e.case.z)+1))); p2<-as.integer(runif(1, min=1, max=(nrow(e.control.z)+1))); for(j in 1:num.cov){ aug.evaluate.data[i, j]<-e.case.z[p1, j]-e.control.z[p2, j]; } } # --- now estimate --- beta.est<-rep(0, num.cov); beta.est[anchor]<- 1; h.v<-rep(0, num.cov); f.v<-rep(0, num.cov); g.v<-rep(0, num.cov); for(s in 1:step){ gradient<- rep(0, num.cov); for(i in 1:nrow(aug.cross.data)){ diff.z<-aug.cross.data[i, ]; s.func<-1/(1+exp(-sum(diff.z*beta.est))); gradient<-gradient+s.func*(1-s.func)*diff.z; } gradient[anchor]<- 0; # -- standardized gradient --- if(max(abs(gradient)) >0.000001){ for(i in 1:length(g.v)) g.v[i]<-gradient[i]/max(abs(gradient)); } if(max(abs(gradient)) <= 0.000001){ for(i in 1:length(g.v)) g.v[i]<-0.0; } f.v<-ifelse(abs(g.v)>=tau, 1, 0); for(i in 1:length(g.v)) h.v[i]<-f.v[i]*g.v[i]; beta.est<-beta.est+increase*h.v; # --- now evaluate --- cri.temp<-0; for(p in 1:nrow(aug.evaluate.data)){ e.t.betaz<-sum(aug.evaluate.data[p, ]*beta.est); cri.temp<-cri.temp+1/(1+exp(-e.t.betaz)); } if(is.na(cri.temp)) cri.temp<-0; criteria[s]<-criteria[s]+cri.temp; } # --- end of step --- } write.table(cbind(seq(from=1, to=step, by=1), criteria), file="cv-score.txt", append=F, quote=F, row.name=F, col.name=F); ss<-seq(from=1, to=step, by=1); step.est<-ss[criteria==max(criteria)]; write.table(step.est, file="cross-step.txt", append=T, quote=F, row.name=F, col.name=F);