### April 12th, 2006 # estimation: logistic model; cluster TGDR # mainly copied from CTGDR/Cox and HIV (logistic TGDR) all.y<-read.table("ind-org.dat")$V1; all.x<-read.table("sel-norm-exp.dat"); cluster<-read.table("cluster.dat"); samplesize<-length(all.x); aug.x<-cbind(rep(1, samplesize), t(all.x)); aug.cov<-ncol(aug.x); ### tau1<-1.0; tau2<-1.0; step<-401; increase<-0.01; ### x<-matrix(0, nrow=samplesize, ncol=aug.cov); for(i in 1:nrow(x)){ for(j in 1:ncol(x)){ x[i,j]<-aug.x[i, j]; } } # --- now prepare to estimate --- beta.est<-rep(0, aug.cov); h.v<-rep(0, aug.cov); f.v<-rep(0, aug.cov); f.v2<-rep(0, aug.cov); g.v<-rep(0, aug.cov); g.v2<-rep(0, aug.cov); cri.2<-rep(0, aug.cov); for(s in 1:step){ # --- compute gradient --- beta.z<-x%*%beta.est; beta.p<-1/(1+exp(-beta.z)); term.1<-all.y/beta.p-(1-all.y)/(1-beta.p); term.2<-exp(beta.z)/((1+exp(beta.z))^2); gradient<-(t(term.1*term.2))%*%x; max.gradient<-max(abs(gradient)); # write.table(cbind(s, max.gradient), file="grad.txt", quote=F, # row.name=F, col.name=F, append=T); grad<-gradient/max.gradient; gradient<-grad; ###### copied from cross.R ########### # --- we work with the covariates only # --- when computing the threshold, the intercept is not considered work.gradient.dat<- data.frame(cbind(cluster[ ,2], abs(gradient[2:length(gradient)]))); num.cluster<-length(unique(cluster[ ,2])); cri.cluster<-rep(0, num.cluster); max.grad.cluster<-rep(0, num.cluster); # --- get the cluster gradient; and the max within each cluster for(clus in 1:num.cluster){ work.grad.clus<- work.gradient.dat[work.gradient.dat[ ,1]==clus,]; cri.cluster[clus]<-sum(work.grad.clus[ ,2]); max.grad.cluster[clus]<-max(abs(work.grad.clus[ ,2])); } # --- for each gene, get the cluster criteria (pseudi.gv) # and within cluster criteria (cri.2) # Note: length differs by 1. pseudo.gv<-rep(0, aug.cov); for(i in 2:aug.cov){ pseudo.gv[i]<-cri.cluster[cluster$V2[i-1]]; cri.2[i]<-max.grad.cluster[cluster$V2[i-1]]*tau2; } for(i in 1:length(g.v)) g.v[i]<-gradient[i]; # --- get within cluster threshold indicator f.v2<-ifelse(abs(g.v)>=cri.2, 1, 0); f.v2[1]<-1; #-- for the intercept # --- get the cluster threshold indicator for each covariate cri.t<- tau1*max(pseudo.gv); f.v<-ifelse(pseudo.gv>=cri.t, 1, 0); f.v[1]<-1; #-- for the intercept h.v<-f.v*f.v2*g.v; beta.est<-beta.est+increase*h.v; } beta2<-round(beta.est[2:length(beta.est)], digits=3); count<-length(beta2[beta2!=0]); write.table(beta2, file="beta-est.txt", quote=F, append=F, row.name=F, col.name=F); final<-cbind(beta2, cluster[ ,2]); nonzero<-final[final[ ,1]!=0, ]; length(unique(nonzero[ ,2])); #--- compute class error --- beta.z<-x%*%beta.est; beta.p<-1/(1+exp(-beta.z)); pred<-ifelse(beta.p>=sort(beta.p, decreasing=T)[sum(all.y)], 1, 0);