###  April 12th, 2006
#    estimation: logistic model; cluster TGDR
#    mainly copied from CTGDR/Cox and HIV (logistic TGDR)


all.y<-read.table("ind-org.dat")$V1;
all.x<-read.table("sel-norm-exp.dat");
cluster<-read.table("cluster.dat");

samplesize<-length(all.x);

aug.x<-cbind(rep(1, samplesize), t(all.x));

aug.cov<-ncol(aug.x);

###
tau1<-1.0; tau2<-1.0;
step<-401; increase<-0.01;
###

x<-matrix(0, nrow=samplesize, ncol=aug.cov);

for(i in 1:nrow(x)){
   for(j in 1:ncol(x)){
      x[i,j]<-aug.x[i, j];
   }
}

# --- now prepare to estimate ---
beta.est<-rep(0, aug.cov);
h.v<-rep(0, aug.cov);
      
f.v<-rep(0, aug.cov);   f.v2<-rep(0, aug.cov);
g.v<-rep(0, aug.cov);   g.v2<-rep(0, aug.cov);

cri.2<-rep(0, aug.cov);

for(s in 1:step){

   # --- compute gradient ---
   beta.z<-x%*%beta.est;
   beta.p<-1/(1+exp(-beta.z));
   term.1<-all.y/beta.p-(1-all.y)/(1-beta.p);
   term.2<-exp(beta.z)/((1+exp(beta.z))^2);
   gradient<-(t(term.1*term.2))%*%x;


   max.gradient<-max(abs(gradient));

#   write.table(cbind(s, max.gradient), file="grad.txt", quote=F,
#	row.name=F, col.name=F, append=T);
   
   grad<-gradient/max.gradient;
   gradient<-grad;


      ###### copied from cross.R ###########
      # --- we work with the covariates only
      # --- when computing the threshold, the intercept is not considered
      work.gradient.dat<-
          data.frame(cbind(cluster[ ,2],
                abs(gradient[2:length(gradient)])));
      num.cluster<-length(unique(cluster[ ,2]));
      cri.cluster<-rep(0, num.cluster);
      max.grad.cluster<-rep(0, num.cluster);

      # --- get the cluster gradient; and the max within each cluster
      for(clus in 1:num.cluster){
          work.grad.clus<-   
              work.gradient.dat[work.gradient.dat[ ,1]==clus,];
          cri.cluster[clus]<-sum(work.grad.clus[ ,2]);
          max.grad.cluster[clus]<-max(abs(work.grad.clus[ ,2]));
      }
      
      # --- for each gene, get the cluster criteria (pseudi.gv)
      #     and within cluster criteria (cri.2)
      #     Note: length differs by 1.
      pseudo.gv<-rep(0, aug.cov);
      for(i in 2:aug.cov){
          pseudo.gv[i]<-cri.cluster[cluster$V2[i-1]];
          cri.2[i]<-max.grad.cluster[cluster$V2[i-1]]*tau2;
      }
          
      for(i in 1:length(g.v))
          g.v[i]<-gradient[i];
      
      # --- get within cluster threshold indicator
      f.v2<-ifelse(abs(g.v)>=cri.2, 1, 0);
      f.v2[1]<-1;   #-- for the intercept
      
      # --- get the cluster threshold indicator for each covariate
      cri.t<- tau1*max(pseudo.gv);
      f.v<-ifelse(pseudo.gv>=cri.t, 1, 0);
      f.v[1]<-1;    #-- for the intercept
      
      h.v<-f.v*f.v2*g.v;     
      beta.est<-beta.est+increase*h.v;


}


beta2<-round(beta.est[2:length(beta.est)], digits=3);

count<-length(beta2[beta2!=0]);   
write.table(beta2, file="beta-est.txt", quote=F, append=F,
           row.name=F, col.name=F);

final<-cbind(beta2, cluster[ ,2]);
nonzero<-final[final[ ,1]!=0, ];
length(unique(nonzero[ ,2]));

#--- compute class error ---

beta.z<-x%*%beta.est;
beta.p<-1/(1+exp(-beta.z));

pred<-ifelse(beta.p>=sort(beta.p, decreasing=T)[sum(all.y)], 1, 0);