Home > Basic Numerical Examples
Just the Basics: Using mph.fit to fit standard contingency table models
Author: Joseph B. Lang, Department of Statistics and Actuarial Science, University of Iowa, Iowa City, IA USA 52245 (Last Updated: 5/22/09)
The numerical examples below were fitted using mph.fit, VERSION 3.1. This document only includes input, not output.
For another collection of less standard examples, click here.
Primary References:
EXAMPLE 1. Test whether a binomial probability equals 0.5.Lang, J.B (2004). "Multinomial-Poisson Homogeneous Models for Contingency Tables," Annals of Statistics, 32, 340-383 .
Lang, J.B. (2005). "Homogeneous Linear Predictor Models for Contingency Tables," JASA, 100, 121-134
See also, other on-line documentation starting at www.stat.uiowa.edu/~jblang/mph.fitting/
############################################################################################# # # EXAMPLE 1. Test whether a binomial probability equals 0.5 # # y=(15, 22) <- Y ~ MP(nu,p|strata=1,fixed="all"); i.e. Y ~ multinomial # # In other symbols, # # y=(15, 22) <- Y=(Y[1], Y[2]) ~ multinomial(37, p=(p[1],p[2])). # # GOAL: Test H0: p[1] = 0.5 vs. H1: not H0. a1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5}) #Alternative specifications... a2 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-p[2]}) a3 <- mph.fit(y=c(15, 22), constraint=function(p){log(p[1]/p[2])}) a4 <- mph.fit(y=c(15, 22), constraint=function(m){m[1]-m[2]},h.mean=T) a5 <- mph.fit(y=c(15, 22), link=function(p){p}, X=matrix(1,2,1)) a6 <- mph.fit(y=c(15, 22), link="logm",X=matrix(1,2,1)) # # Alternatively, assume that # # y=(15, 22) <- Y ~ MP(nu, p|strata=1,fixed="none"); i.e. Y ~ indep Poisson. # # In other symbols, # y=(15, 22) <- Y = (Y[1],Y[2]), where Y[i] indep ~ Poisson(nu p[i]), i=1,2. # # GOAL: Test H0: p[1] = 0.5 vs. H1: not H0. b1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5},fixed.strata="none") mph.summary(a1,T) mph.summary(b1,T) ############################################################################################## # # EXAMPLE 2. Test whether a multinomial probability vector is uniform. # Test whether a multinomial probability vector equals a specific value. # # y <- Y=(Y[1],...,Y[6]) ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial. # # In other symbols, # # y <- Y ~ multinomial(15, p=(p[1],...,p[6])) # # GOAL: Test H0: p[1]=p[2]=...=p[6] vs. H1: not H0. # y <- rmultinom(1,15,rep(1,6)) a1 <- mph.fit(y,L.fct = function(p){p}, X=matrix(1,6,1), y.eps=0.1) #Alternative specification... a2 <- mph.fit(y,h.fct = function(p){as.matrix(p[-6]-1/6)},y.eps=0.1) mph.summary(a1,T); mph.summary(a2,T) #Test whether p = (1, 2, 3, 1, 2, 3)/12... # p0 <- c(1,2,3,1,2,3)/12 b <- mph.fit(y,h.fct= function(p){as.matrix(p[-6]-p0[-6])},y.eps=0.1) mph.summary(b,T) ############################################################################################### # # EXAMPLE 3. Test whether a multinomial probability vector satisfies a particular constraint. # # Data Source: Agresti 25:2002. # # y=(30,63,63) <- Y ~ MP(nu, p| strata=1, fixed="all"); i.e. Y ~ multinomial. # # In other symbols, # # y=(30,63,63) <- Y ~ multinomial(156, p=(p[1],p[2],p[3])) # # GOAL: Test H0: p[1]+p[2] = p[1]/(p[1]+p[2]) vs. H1: not H0. # y <- c(30, 63, 63) h.fct <- function(p) { (p[1] + p[2]) - p[1]/(p[1]+p[2]) } a <- mph.fit(y, h.fct=h.fct) mph.summary(a,T) ############################################################################################## # # EXAMPLE 4. Test of Independence in a 2x2 Table. # # y=(y[1,1],y[1,2],y[2,1],y[2,2]) = (25,18,13,21) <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial # # In other symbols, # y =(y[1,1],y[1,2],y[2,1],y[2,2]) <- Y ~ multinomial(77, p=(p[1,1],p[1,2],p[2,1],p[2,2])) # # GOAL: Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1 vs. H1: not H0. d <- scan(what=list(A="",B="",count=0)) 1 1 25 1 2 18 2 1 13 2 2 21 d <- data.frame(d) a1 <- mph.fit(y=d$count, h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])}) # Alternative specifications of independence.... a2 <- mph.fit(y=d$count, h.fct=function(p){p <- matrix(p,2,2,byrow=T); log(p[1,1]*p[2,2]/p[1,2]/p[2,1])}) a3 <- mph.fit(y=d$count, h.fct=function(p){p[1]*p[4]/p[2]/p[3] - 1}) a4 <- mph.fit(y=d$count, h.fct=function(p){p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4])}) a5 <- mph.fit(y=d$count, L.fct="logm", X=model.matrix(~A+B,data=d)) # Suppose we wished to output observed and fitted values of log OR, OR, and P(B=1|A=1)-P(B=1|A=2)... L.fct <- function(p) { L <- as.matrix(c( log(p[1]*p[4]/p[2]/p[3]), p[1]*p[4]/p[2]/p[3], p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4]) )) rownames(L) <- c("log OR","OR","P(B=1|A=1) - P(B=1|A=2)") L } a6 <- mph.fit(y=d$count,h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])}, L.fct=L.fct,X=diag(3)) # Unrestricted Model... b <- mph.fit(y=d$count, L.fct=L.fct, X=diag(3)) mph.summary(a6,T) mph.summary(b,T) ############################################################################################## # # EXAMPLE 5. Test of Independence in a 4x4 Table. (Using Log-Linear Model.) # # Data Source: Table 2.8, Agresti, 57:2002. # # y <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial # # In other symbols, # y <- Y ~ multinomial(96, p=(p[1,1],p[1,2],p[2,1],p[2,2])) # # GOAL: Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1 vs. H1: not H0. # d <- scan(what=list(Income="",JobSatisf="",count=0)) <15 VD 1 <15 LD 3 <15 MS 10 <15 VS 6 15-25 VD 2 15-25 LD 3 15-25 MS 10 15-25 VS 7 25-40 VD 1 25-40 LD 6 25-40 MS 14 25-40 VS 12 >40 VD 0 >40 LD 1 >40 MS 9 >40 VS 11 d <- data.frame(d) a <- mph.fit(y=d$count, link="logp", X=model.matrix(~Income+JobSatisf,data=d)) mph.summary(a) #Alternatively, b <- mph.fit(y=d$count, link="logm", X=model.matrix(~Income+JobSatisf,data=d)) mph.summary(b) ############################################################################################### # # EXAMPLE 6. Test Marginal Homogeneity in a 3x3 Table # # Data Source: Table 10.16, Agresti, 445:2002. # # y <- Y ~ MP(nu, p |strata=1, fixed="all"); i.e. Y ~ multinomial. # # Specifically, # y <- Y ~ multinomial(160, p=(p[1,1],...,p[3,3])) # # GOAL: Test H0: p[1,+] = p[+,1], p[2,+] = p[+,2], p[3,+] = p[+,3] vs. H1: not H0. d <- scan(what=list(Siskel="",Ebert="",count=0)) Pro Pro 64 Pro Mixed 9 Pro Con 10 Mixed Pro 11 Mixed Mixed 13 Mixed Con 8 Con Pro 13 Con Mixed 8 Con Con 24 d <- data.frame(d) h.fct <- function(p){ p.Siskel <- M.fct(d$Siskel)%*%p p.Ebert <- M.fct(d$Ebert)%*%p as.matrix(c(p.Siskel[-3] - p.Ebert[-3])) } a1 <- mph.fit(y=d$count, h.fct=h.fct) mph.summary(a1,T) # Suppose that we wish to report on the observed and fitted marginal probabilities # L.fct <- function(p) { p.Siskel <- M.fct(d$Siskel)%*%p p.Ebert <- M.fct(d$Ebert)%*%p L <- as.matrix(c(p.Siskel,p.Ebert)) rownames(L) <- c(paste(sep="", "P(Siskel=",levels(d$Siskel),")"), paste(sep="", "P(Ebert=", levels(d$Ebert),")")) L } a2 <- mph.fit(y=d$count,h.fct=h.fct,L.fct=L.fct,X=diag(6)) mph.summary(a2,T) # M.fct(factor)%*%p gives the marginal probabilities corresponding to the # levels of 'factor'. The marginal probabilities are ordered by the levels of 'factor'. # # Alternatively, in this rectangular table setting, we can find the marginal # probabilities using the apply(...) function. In this case, the marginal # probabilities are ordered as they are entered in the data set. # h.fct <- function(p) { p <- matrix(p,3,3,byrow=T) p.Siskel <- apply(p,1,sum) p.Ebert <- apply(p,2,sum) as.matrix(c(p.Siskel[-3] - p.Ebert[-3])) } L.fct <- function(p) { p <- matrix(p,3,3,byrow=T) p.Siskel <- apply(p,1,sum) p.Ebert <- apply(p,2,sum) L <- as.matrix(c(p.Siskel,p.Ebert)) rownames(L) <- c("P(Siskel=Pro)","P(Siskel=Mixed)","P(Siskel=Con)", "P(Ebert=Pro)", "P(Ebert=Mixed)", "P(Ebert=Con)") L } b <- mph.fit(y=d$count,h.fct=h.fct,L.fct=L.fct,X=diag(6)) ################################################################################################## # # EXAMPLE 7. Log-linear Model for 2x2x2 Table # # Data Source: Table 8.16, Agresti 347:2002 # # y <- Y ~ MP(nu, p| strata=1, fixed="all"); i.e. Y ~ multinomial. # # Specifically, # # y <- Y ~ multinomial(621, p). # # The counts in y are cross-classification counts for variables # G=Gender, I=Information Opinion, H= Health Opinion. # # GOAL: Fit the loglinear models [GI, GH, IH] and [G, IH]. # d <- scan(what=list(G ="",I="",H="",count=0)) Male Support Support 76 Male Support Oppose 160 Male Oppose Support 6 Male Oppose Oppose 25 Female Support Support 114 Female Support Oppose 181 Female Oppose Support 11 Female Oppose Oppose 48 d <- data.frame(d) #Fit loglinear model [GI, GH, IH]... a1 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + G:I + G:H + I:H, data=d)) #Fit loglinear model [G, IH]... a2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H,data=d)) #Different Sampling Distribution Assumptions: # #Alternatively, assume # y <- Y ~ MP(nu, p| strata=1, fixed="none"); that is, Y ~ indep Poisson. # # In other symbols, # y <- Y, where Y[i] indep ~ Poisson(m[i] = nu p[i]). # Here, nu is the unknown expected sample size. b2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), fixed="none") #Alternatively, assume # y <- Y ~ MP(nu, p| strata=Gender, fixed="all"). That is, Y ~ prod multinomial. # # In other symbols, # y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]), where (Y[i,1,1],...,Y[i,2,2]) indep ~ multinomial(n[i], p[i,,]). # Here, p[i,j,k] = P(I=j,H=k|G=i) and n[1]=267 and n[2]=354 are the a priori fixed # samples sizes for males and females. c2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d$G) #Alternatively, assume # y <- Y ~ MP(nu, p| strata=Gender, fixed="none"). That is, Y ~ prod Poisson. # # In other symbols, # y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]), where Y[i,j,k] indep ~ Poisson(m[i,j,k] = nu[i] p[i,j,k]). # Here, p[i,j,k] = P(I=j, H=k| G=i) and nu[1] and nu[2] are the unknown expected # sample sizes for males and for females. d2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d$G, fixed="none") cbind(a2$m,b2$m,c2$m,d2$m, sqrt(diag(a2$covm)),sqrt(diag(b2$covm)),sqrt(diag(c2$covm)),sqrt(diag(d2$covm))) cbind(a2$p,b2$p,c2$p,d2$p, sqrt(diag(a2$covp)),sqrt(diag(b2$covp)),sqrt(diag(c2$covp)),sqrt(diag(d2$covp))) ########################################################################################################## # # EXAMPLE 8. Fit Linear-by-Linear Log-Linear Model # # Data Source: Table 8.15, Agresti, 345:2002 # # y <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial # # Specifically, # y <- Y ~ multinomial(1425, p) # # GOAL: Assess the fit of the linear-by-linear log-linear model. # d <- scan(what=list(Schooling="",Abortion = "", count=0)) <HS Disapprove 209 <HS Middle 101 <HS Approve 237 HS Disapprove 151 HS Middle 126 HS Approve 426 >HS Disapprove 16 >HS Middle 21 >HS Approve 138 Schooling.score <- -1*(d$Schooling=="<HS") + 0*(d$Schooling=="HS") + 1*(d$Schooling==">HS") Abortion.score <- -1*(d$Abortion=="Disapprove") + 0*(d$Abortion=="Middle") + 1*(d$Abortion=="Approve") d <- data.frame(d,Schooling.score,Abortion.score) a <- mph.fit(y=d$count, link="logm",X=model.matrix(~Schooling + Abortion + Schooling.score:Abortion.score,data=d)) mph.summary(a,T) ############################################################################################################ # # EXAMPLE 9. Marginal Standardization of a Contingency Table # # Data Source: Table 8.15, Agresti 345:2002. # # GOAL: # For a two-way table, find the standardized values of y, say y*, # that satisfy (i) y* has the same odds ratios as y and # (ii) y* has row and column totals equal to 100. # # Note: This is equivalent to the problem of finding the fitted # values for the following model... # x <- Y ~ multinomial(n, p=(p[1,1],...,p[3,3])) # p[1,+] = p[2,+] = p[3,+] = p[+,1] = p[+,2] = p[+,3] = 1/3 # p[1,1]*p[2,2]/p[2,1]/p[1,2] = or[1,1] # p[1,2]*p[2,3]/p[2,2]/p[1,3] = or[1,2] # p[2,1]*p[3,2]/p[3,1]/p[2,2] = or[2,1] # p[2,2]*p[3,3]/p[3,2]/p[2,3] = or[2,2], # where or[i,j] = y[i,j]*y[i+1,j+1]/y[i+1,j]/y[i,j+1] are # the observed (y) odds ratios. # If m is the vector of fitted values, then y* = m*300/sum(m) # are the standardized values of y. # Here x can be any vector of 9 counts. # Choosing x so that the sum is 300 leads to sum(m) = 300, so that # y* = m in this case. # # d <- scan(what=list(Schooling="",Abortion = "", count=0)) <HS Disapprove 209 <HS Middle 101 <HS Approve 237 HS Disapprove 151 HS Middle 126 HS Approve 426 >HS Disapprove 16 >HS Middle 21 >HS Approve 138 d <- data.frame(d) h.fct <- function(p) { p.Schooling <- M.fct(d$Schooling)%*%p p.Abortion <- M.fct(d$Abortion )%*%p p <- matrix(p,3,3,byrow=T) as.matrix(c( p.Schooling[-3]-1/3,p.Abortion[-3]-1/3, p[1,1]*p[2,2]/p[2,1]/p[1,2] - 209*126/151/101, p[1,2]*p[2,3]/p[2,2]/p[1,3] - 101*426/126/237, p[2,1]*p[3,2]/p[3,1]/p[2,2] - 151*21/16/126, p[2,2]*p[3,3]/p[3,2]/p[2,3] - 126*138/21/426 )) } b <- mph.fit(y=d$count,h.fct=h.fct) ystar <- b$m*300/sum(b$m) matrix(round(ystar,1),3,3,byrow=T) x <- c(rep(33,8),36) b <- mph.fit(y=x,h.fct=h.fct) ystar <- b$m matrix(round(ystar,1),3,3,byrow=T) ################################################################################################################ # # EXAMPLE 10. Cumulative Logit Model # # Data Source: Table 7.19, Agresti, 306:2002. # # y <- Y ~ MP(nu, p| strata=Therapy*Gender, fixed="all"); i.e. Y ~ prod multinomial # # Here, y[i,j,k] is the cross-classification count corresponding to # Therapy=i, Gender=j, Response=k. # # The table probabilities are defined as p[i,j,k] = P(Response=k|Therapy=i,Gender=j). # # Goal: Fit the cumulative logit proportional odds model that includes # the main effect of Therapy and Gender. # d <- scan(what=list(Therapy="",Gender="",Response="",count=0)) Sequential Male Progressive 28 Sequential Male NoChange 45 Sequential Male Partial 29 Sequential Male Complete 26 Sequential Female Progressive 4 Sequential Female NoChange 12 Sequential Female Partial 5 Sequential Female Complete 2 Alternating Male Progressive 41 Alternating Male NoChange 44 Alternating Male Partial 20 Alternating Male Complete 20 Alternating Female Progressive 12 Alternating Female NoChange 7 Alternating Female Partial 3 Alternating Female Complete 1 d <- data.frame(d) strata <- paste(sep="",d$Therapy,".",d$Gender) d <- data.frame(d,strata) d3 <- subset(d,Response!="Complete") levels(d3$Response) <- c(NA,"NoChange","Partial","Progressive") L.fct <- function(p) { p <- matrix(p,4,4,byrow=T) clogit <- c() for (s in 1:4) { clogit <- c(clogit, log(sum(p[s,1]) /sum(p[s,2:4])), log(sum(p[s,1:2])/sum(p[s,3:4])), log(sum(p[s,1:3])/sum(p[s,4])) ) } L <- as.matrix(clogit) rownames(L) <- c(paste(sep="","log odds(R < ",2:4,"|",d3$strata,")")) L } a <- mph.fit(d$count,link=L.fct,X=model.matrix(~ -1 + Response + Therapy + Gender,data=d3) ,strata=strata) # Fit the related non-proportional odds cumulative logit model b <- mph.fit(d$count,link=L.fct, X=model.matrix(~ Response+Response*Therapy + Response*Gender-1-Therapy-Gender,data=d3),strata=strata) mph.summary(a,T) mph.summary(b,T) ##########################################################################################################