# --- CTGDR-Cox model estimation --- # --- Aug. 17th, 2005 --- tau1<-1.0; tau2<-1.0; step<-681; increase<-0.005; # -------------------------------- z.dat<-t(read.table("norm-express.dat")); outcome.dat<-read.table("survival.dat", header=T); cluster<-read.table("cluster-info.txt", header=T); samplesize<-nrow(z.dat); num.cov<-ncol(z.dat); z<-matrix(rep(0, samplesize*num.cov), nrow=samplesize); for(i in 1:samplesize){ for(j in 1:num.cov) z[i, j]<-z.dat[i, j]; } observe<-outcome.dat$FollowUp; indicator<-outcome.dat$Status; # --- this part copied from the estimation for Cox-TGDR --- sort.time<-rep(0, samplesize); sort.indicator<-rep(0, samplesize); sort.z<-matrix(rep(0, samplesize*num.cov), nrow=samplesize); sort.index<-sort(observe, index.return=T)$ix; for(i in 1:samplesize){ sort.time[i]<-observe[sort.index[i]]; sort.indicator[i]<-indicator[sort.index[i]]; for(j in 1:num.cov){ sort.z[i, j]<-z[sort.index[i], j]; } } # --- now estimation, a combination of CTGDR cross and TGDR estimate --- beta.est<-rep(0, num.cov); h.v<-rep(0, num.cov); f.v<-rep(0, num.cov); f.v2<-rep(0, num.cov); g.v<-rep(0, num.cov); g.v2<-rep(0, num.cov); cri.2<-rep(0, num.cov); for(s in 1:step){ # --- first compute beta*com.z --- beta.z<-sort.z%*%beta.est; exp.beta.z<-rep(0, nrow(beta.z)); for(i in 1:length(exp.beta.z)) exp.beta.z[i]<-exp(beta.z[i,1]); # --- compute gradient --- gradient<-rep(0, num.cov); for(i in 1:samplesize){ if(sort.indicator[i]==1){ # --- for event only term1<-sort.z[i, ]; upper<-rep(0, length(term1)); for(j in i:samplesize){ upper<-upper+sort.z[j, ]*exp.beta.z[j]; } term2<-upper/sum(exp.beta.z[i:samplesize]); gradient<-gradient+term1-term2; } } max.gradient<-max(abs(gradient)); # --- the following copied from cross validation --- # ---- now the new and tricky part -------- work.gradient.dat<- data.frame(cbind(cluster$cluster, abs(gradient))); cri.cluster<-rep(0, 20); max.grad.cluster<-rep(0, 20); for(clus in 1:20){ work.grad.clus<- work.gradient.dat[work.gradient.dat[ ,1]==clus,]; cri.cluster[clus]<-sum(work.grad.clus[ ,2]); max.grad.cluster[clus]<-max(abs(work.grad.clus[ ,2])); } pseudo.gv<-rep(0, num.cov); for(clus in 1:num.cov){ pseudo.gv[clus]<-cri.cluster[cluster$cluster[clus]]; cri.2[clus]<-max.grad.cluster[cluster$cluster[clus]]*tau2; } for(i in 1:length(g.v)) g.v[i]<-gradient[i]; for(i in 1:length(f.v2)){ if( abs(g.v[i]) >= cri.2[i] ) f.v2[i]<-1; if( abs(g.v[i]) < cri.2[i] ) f.v2[i]<-0; } cri.t<- tau1*max(pseudo.gv); for(i in 1:length(pseudo.gv)){ if(pseudo.gv[i]>=cri.t){ f.v[i]<-1; } if(pseudo.gv[i]< cri.t){ f.v[i]<-0; } } for(i in 1:length(g.v)) h.v[i]<-f.v[i]*f.v2[i]*g.v[i]; beta.est<-beta.est+increase*h.v; } # --- model checking --- library(survival); score<-z%*%beta.est; group<-ifelse(score>=median(score), 1, 0); train.chisq<-survdiff(Surv(observe, indicator)~group, rho=0); write.table(cbind(s, train.chisq$chisq), file="test.txt", quote=F, append=F, row.name=F, col.name=T); write.table(beta.est, file="est2.txt", quote=F, append=F, row.name=F, col.name=F); write.table(score, file="score.txt", quote=F, append=F, row.name=F, col.name=F);