Home > Basic Numerical Examples
Just the Basics: Using mph.fit to fit standard contingency table models
Author: Joseph B. Lang, Department of Statistics and Actuarial Science, University of Iowa, Iowa City, IA USA 52245 (Last Updated: 5/22/09)
The numerical examples below were fitted using mph.fit, VERSION 3.1. This document only includes input, not output.
For another collection of less standard examples, click here.
Primary References:
EXAMPLE 1. Test whether a binomial probability equals 0.5.Lang, J.B (2004). "Multinomial-Poisson Homogeneous Models for Contingency Tables," Annals of Statistics, 32, 340-383 .
Lang, J.B. (2005). "Homogeneous Linear Predictor Models for Contingency Tables," JASA, 100, 121-134
See also, other on-line documentation starting at www.stat.uiowa.edu/~jblang/mph.fitting/
#############################################################################################
#
# EXAMPLE 1. Test whether a binomial probability equals 0.5
#
# y=(15, 22) <- Y ~ MP(nu,p|strata=1,fixed="all"); i.e. Y ~ multinomial
#
# In other symbols,
#
# y=(15, 22) <- Y=(Y[1], Y[2]) ~ multinomial(37, p=(p[1],p[2])).
#
# GOAL: Test H0: p[1] = 0.5 vs. H1: not H0.
a1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5})
#Alternative specifications...
a2 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-p[2]})
a3 <- mph.fit(y=c(15, 22), constraint=function(p){log(p[1]/p[2])})
a4 <- mph.fit(y=c(15, 22), constraint=function(m){m[1]-m[2]},h.mean=T)
a5 <- mph.fit(y=c(15, 22), link=function(p){p}, X=matrix(1,2,1))
a6 <- mph.fit(y=c(15, 22), link="logm",X=matrix(1,2,1))
#
# Alternatively, assume that
#
# y=(15, 22) <- Y ~ MP(nu, p|strata=1,fixed="none"); i.e. Y ~ indep Poisson.
#
# In other symbols,
# y=(15, 22) <- Y = (Y[1],Y[2]), where Y[i] indep ~ Poisson(nu p[i]), i=1,2.
#
# GOAL: Test H0: p[1] = 0.5 vs. H1: not H0.
b1 <- mph.fit(y=c(15, 22), constraint=function(p){p[1]-0.5},fixed.strata="none")
mph.summary(a1,T)
mph.summary(b1,T)
##############################################################################################
#
# EXAMPLE 2. Test whether a multinomial probability vector is uniform.
# Test whether a multinomial probability vector equals a specific value.
#
# y <- Y=(Y[1],...,Y[6]) ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial.
#
# In other symbols,
#
# y <- Y ~ multinomial(15, p=(p[1],...,p[6]))
#
# GOAL: Test H0: p[1]=p[2]=...=p[6] vs. H1: not H0.
#
y <- rmultinom(1,15,rep(1,6))
a1 <- mph.fit(y,L.fct = function(p){p}, X=matrix(1,6,1), y.eps=0.1)
#Alternative specification...
a2 <- mph.fit(y,h.fct = function(p){as.matrix(p[-6]-1/6)},y.eps=0.1)
mph.summary(a1,T); mph.summary(a2,T)
#Test whether p = (1, 2, 3, 1, 2, 3)/12...
#
p0 <- c(1,2,3,1,2,3)/12
b <- mph.fit(y,h.fct= function(p){as.matrix(p[-6]-p0[-6])},y.eps=0.1)
mph.summary(b,T)
###############################################################################################
#
# EXAMPLE 3. Test whether a multinomial probability vector satisfies a particular constraint.
#
# Data Source: Agresti 25:2002.
#
# y=(30,63,63) <- Y ~ MP(nu, p| strata=1, fixed="all"); i.e. Y ~ multinomial.
#
# In other symbols,
#
# y=(30,63,63) <- Y ~ multinomial(156, p=(p[1],p[2],p[3]))
#
# GOAL: Test H0: p[1]+p[2] = p[1]/(p[1]+p[2]) vs. H1: not H0.
#
y <- c(30, 63, 63)
h.fct <- function(p) {
(p[1] + p[2]) - p[1]/(p[1]+p[2])
}
a <- mph.fit(y, h.fct=h.fct)
mph.summary(a,T)
##############################################################################################
#
# EXAMPLE 4. Test of Independence in a 2x2 Table.
#
# y=(y[1,1],y[1,2],y[2,1],y[2,2]) = (25,18,13,21) <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial
#
# In other symbols,
# y =(y[1,1],y[1,2],y[2,1],y[2,2]) <- Y ~ multinomial(77, p=(p[1,1],p[1,2],p[2,1],p[2,2]))
#
# GOAL: Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1 vs. H1: not H0.
d <- scan(what=list(A="",B="",count=0))
1 1 25
1 2 18
2 1 13
2 2 21
d <- data.frame(d)
a1 <- mph.fit(y=d$count, h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])})
# Alternative specifications of independence....
a2 <- mph.fit(y=d$count, h.fct=function(p){p <- matrix(p,2,2,byrow=T); log(p[1,1]*p[2,2]/p[1,2]/p[2,1])})
a3 <- mph.fit(y=d$count, h.fct=function(p){p[1]*p[4]/p[2]/p[3] - 1})
a4 <- mph.fit(y=d$count, h.fct=function(p){p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4])})
a5 <- mph.fit(y=d$count, L.fct="logm", X=model.matrix(~A+B,data=d))
# Suppose we wished to output observed and fitted values of log OR, OR, and P(B=1|A=1)-P(B=1|A=2)...
L.fct <- function(p) {
L <- as.matrix(c(
log(p[1]*p[4]/p[2]/p[3]),
p[1]*p[4]/p[2]/p[3],
p[1]/(p[1]+p[2]) - p[3]/(p[3]+p[4])
))
rownames(L) <- c("log OR","OR","P(B=1|A=1) - P(B=1|A=2)")
L
}
a6 <- mph.fit(y=d$count,h.fct=function(p){log(p[1]*p[4]/p[2]/p[3])}, L.fct=L.fct,X=diag(3))
# Unrestricted Model...
b <- mph.fit(y=d$count, L.fct=L.fct, X=diag(3))
mph.summary(a6,T)
mph.summary(b,T)
##############################################################################################
#
# EXAMPLE 5. Test of Independence in a 4x4 Table. (Using Log-Linear Model.)
#
# Data Source: Table 2.8, Agresti, 57:2002.
#
# y <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial
#
# In other symbols,
# y <- Y ~ multinomial(96, p=(p[1,1],p[1,2],p[2,1],p[2,2]))
#
# GOAL: Test H0: p[1,1]*p[2,2]/p[1,2]/p[2,1] = 1 vs. H1: not H0.
#
d <- scan(what=list(Income="",JobSatisf="",count=0))
<15 VD 1
<15 LD 3
<15 MS 10
<15 VS 6
15-25 VD 2
15-25 LD 3
15-25 MS 10
15-25 VS 7
25-40 VD 1
25-40 LD 6
25-40 MS 14
25-40 VS 12
>40 VD 0
>40 LD 1
>40 MS 9
>40 VS 11
d <- data.frame(d)
a <- mph.fit(y=d$count, link="logp", X=model.matrix(~Income+JobSatisf,data=d))
mph.summary(a)
#Alternatively,
b <- mph.fit(y=d$count, link="logm", X=model.matrix(~Income+JobSatisf,data=d))
mph.summary(b)
###############################################################################################
#
# EXAMPLE 6. Test Marginal Homogeneity in a 3x3 Table
#
# Data Source: Table 10.16, Agresti, 445:2002.
#
# y <- Y ~ MP(nu, p |strata=1, fixed="all"); i.e. Y ~ multinomial.
#
# Specifically,
# y <- Y ~ multinomial(160, p=(p[1,1],...,p[3,3]))
#
# GOAL: Test H0: p[1,+] = p[+,1], p[2,+] = p[+,2], p[3,+] = p[+,3] vs. H1: not H0.
d <- scan(what=list(Siskel="",Ebert="",count=0))
Pro Pro 64
Pro Mixed 9
Pro Con 10
Mixed Pro 11
Mixed Mixed 13
Mixed Con 8
Con Pro 13
Con Mixed 8
Con Con 24
d <- data.frame(d)
h.fct <- function(p){
p.Siskel <- M.fct(d$Siskel)%*%p
p.Ebert <- M.fct(d$Ebert)%*%p
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}
a1 <- mph.fit(y=d$count, h.fct=h.fct)
mph.summary(a1,T)
# Suppose that we wish to report on the observed and fitted marginal probabilities
#
L.fct <- function(p) {
p.Siskel <- M.fct(d$Siskel)%*%p
p.Ebert <- M.fct(d$Ebert)%*%p
L <- as.matrix(c(p.Siskel,p.Ebert))
rownames(L) <- c(paste(sep="", "P(Siskel=",levels(d$Siskel),")"),
paste(sep="", "P(Ebert=", levels(d$Ebert),")"))
L
}
a2 <- mph.fit(y=d$count,h.fct=h.fct,L.fct=L.fct,X=diag(6))
mph.summary(a2,T)
# M.fct(factor)%*%p gives the marginal probabilities corresponding to the
# levels of 'factor'. The marginal probabilities are ordered by the levels of 'factor'.
#
# Alternatively, in this rectangular table setting, we can find the marginal
# probabilities using the apply(...) function. In this case, the marginal
# probabilities are ordered as they are entered in the data set.
#
h.fct <- function(p) {
p <- matrix(p,3,3,byrow=T)
p.Siskel <- apply(p,1,sum)
p.Ebert <- apply(p,2,sum)
as.matrix(c(p.Siskel[-3] - p.Ebert[-3]))
}
L.fct <- function(p) {
p <- matrix(p,3,3,byrow=T)
p.Siskel <- apply(p,1,sum)
p.Ebert <- apply(p,2,sum)
L <- as.matrix(c(p.Siskel,p.Ebert))
rownames(L) <- c("P(Siskel=Pro)","P(Siskel=Mixed)","P(Siskel=Con)",
"P(Ebert=Pro)", "P(Ebert=Mixed)", "P(Ebert=Con)")
L
}
b <- mph.fit(y=d$count,h.fct=h.fct,L.fct=L.fct,X=diag(6))
##################################################################################################
#
# EXAMPLE 7. Log-linear Model for 2x2x2 Table
#
# Data Source: Table 8.16, Agresti 347:2002
#
# y <- Y ~ MP(nu, p| strata=1, fixed="all"); i.e. Y ~ multinomial.
#
# Specifically,
#
# y <- Y ~ multinomial(621, p).
#
# The counts in y are cross-classification counts for variables
# G=Gender, I=Information Opinion, H= Health Opinion.
#
# GOAL: Fit the loglinear models [GI, GH, IH] and [G, IH].
#
d <- scan(what=list(G ="",I="",H="",count=0))
Male Support Support 76
Male Support Oppose 160
Male Oppose Support 6
Male Oppose Oppose 25
Female Support Support 114
Female Support Oppose 181
Female Oppose Support 11
Female Oppose Oppose 48
d <- data.frame(d)
#Fit loglinear model [GI, GH, IH]...
a1 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + G:I + G:H + I:H, data=d))
#Fit loglinear model [G, IH]...
a2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H,data=d))
#Different Sampling Distribution Assumptions:
#
#Alternatively, assume
# y <- Y ~ MP(nu, p| strata=1, fixed="none"); that is, Y ~ indep Poisson.
#
# In other symbols,
# y <- Y, where Y[i] indep ~ Poisson(m[i] = nu p[i]).
# Here, nu is the unknown expected sample size.
b2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), fixed="none")
#Alternatively, assume
# y <- Y ~ MP(nu, p| strata=Gender, fixed="all"). That is, Y ~ prod multinomial.
#
# In other symbols,
# y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]), where (Y[i,1,1],...,Y[i,2,2]) indep ~ multinomial(n[i], p[i,,]).
# Here, p[i,j,k] = P(I=j,H=k|G=i) and n[1]=267 and n[2]=354 are the a priori fixed
# samples sizes for males and females.
c2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d$G)
#Alternatively, assume
# y <- Y ~ MP(nu, p| strata=Gender, fixed="none"). That is, Y ~ prod Poisson.
#
# In other symbols,
# y <- Y = (Y[1,1,1],Y[1,1,2],...,Y[2,2,2]), where Y[i,j,k] indep ~ Poisson(m[i,j,k] = nu[i] p[i,j,k]).
# Here, p[i,j,k] = P(I=j, H=k| G=i) and nu[1] and nu[2] are the unknown expected
# sample sizes for males and for females.
d2 <- mph.fit(y=d$count, link="logm", X=model.matrix(~G+I+H + I:H, data=d), strata=d$G, fixed="none")
cbind(a2$m,b2$m,c2$m,d2$m, sqrt(diag(a2$covm)),sqrt(diag(b2$covm)),sqrt(diag(c2$covm)),sqrt(diag(d2$covm)))
cbind(a2$p,b2$p,c2$p,d2$p, sqrt(diag(a2$covp)),sqrt(diag(b2$covp)),sqrt(diag(c2$covp)),sqrt(diag(d2$covp)))
##########################################################################################################
#
# EXAMPLE 8. Fit Linear-by-Linear Log-Linear Model
#
# Data Source: Table 8.15, Agresti, 345:2002
#
# y <- Y ~ MP(nu, p|strata=1, fixed="all"); i.e. Y ~ multinomial
#
# Specifically,
# y <- Y ~ multinomial(1425, p)
#
# GOAL: Assess the fit of the linear-by-linear log-linear model.
#
d <- scan(what=list(Schooling="",Abortion = "", count=0))
<HS Disapprove 209
<HS Middle 101
<HS Approve 237
HS Disapprove 151
HS Middle 126
HS Approve 426
>HS Disapprove 16
>HS Middle 21
>HS Approve 138
Schooling.score <- -1*(d$Schooling=="<HS") + 0*(d$Schooling=="HS") + 1*(d$Schooling==">HS")
Abortion.score <- -1*(d$Abortion=="Disapprove") + 0*(d$Abortion=="Middle") + 1*(d$Abortion=="Approve")
d <- data.frame(d,Schooling.score,Abortion.score)
a <- mph.fit(y=d$count, link="logm",X=model.matrix(~Schooling + Abortion + Schooling.score:Abortion.score,data=d))
mph.summary(a,T)
############################################################################################################
#
# EXAMPLE 9. Marginal Standardization of a Contingency Table
#
# Data Source: Table 8.15, Agresti 345:2002.
#
# GOAL:
# For a two-way table, find the standardized values of y, say y*,
# that satisfy (i) y* has the same odds ratios as y and
# (ii) y* has row and column totals equal to 100.
#
# Note: This is equivalent to the problem of finding the fitted
# values for the following model...
# x <- Y ~ multinomial(n, p=(p[1,1],...,p[3,3]))
# p[1,+] = p[2,+] = p[3,+] = p[+,1] = p[+,2] = p[+,3] = 1/3
# p[1,1]*p[2,2]/p[2,1]/p[1,2] = or[1,1]
# p[1,2]*p[2,3]/p[2,2]/p[1,3] = or[1,2]
# p[2,1]*p[3,2]/p[3,1]/p[2,2] = or[2,1]
# p[2,2]*p[3,3]/p[3,2]/p[2,3] = or[2,2],
# where or[i,j] = y[i,j]*y[i+1,j+1]/y[i+1,j]/y[i,j+1] are
# the observed (y) odds ratios.
# If m is the vector of fitted values, then y* = m*300/sum(m)
# are the standardized values of y.
# Here x can be any vector of 9 counts.
# Choosing x so that the sum is 300 leads to sum(m) = 300, so that
# y* = m in this case.
#
#
d <- scan(what=list(Schooling="",Abortion = "", count=0))
<HS Disapprove 209
<HS Middle 101
<HS Approve 237
HS Disapprove 151
HS Middle 126
HS Approve 426
>HS Disapprove 16
>HS Middle 21
>HS Approve 138
d <- data.frame(d)
h.fct <- function(p) {
p.Schooling <- M.fct(d$Schooling)%*%p
p.Abortion <- M.fct(d$Abortion )%*%p
p <- matrix(p,3,3,byrow=T)
as.matrix(c(
p.Schooling[-3]-1/3,p.Abortion[-3]-1/3,
p[1,1]*p[2,2]/p[2,1]/p[1,2] - 209*126/151/101,
p[1,2]*p[2,3]/p[2,2]/p[1,3] - 101*426/126/237,
p[2,1]*p[3,2]/p[3,1]/p[2,2] - 151*21/16/126,
p[2,2]*p[3,3]/p[3,2]/p[2,3] - 126*138/21/426
))
}
b <- mph.fit(y=d$count,h.fct=h.fct)
ystar <- b$m*300/sum(b$m)
matrix(round(ystar,1),3,3,byrow=T)
x <- c(rep(33,8),36)
b <- mph.fit(y=x,h.fct=h.fct)
ystar <- b$m
matrix(round(ystar,1),3,3,byrow=T)
################################################################################################################
#
# EXAMPLE 10. Cumulative Logit Model
#
# Data Source: Table 7.19, Agresti, 306:2002.
#
# y <- Y ~ MP(nu, p| strata=Therapy*Gender, fixed="all"); i.e. Y ~ prod multinomial
#
# Here, y[i,j,k] is the cross-classification count corresponding to
# Therapy=i, Gender=j, Response=k.
#
# The table probabilities are defined as p[i,j,k] = P(Response=k|Therapy=i,Gender=j).
#
# Goal: Fit the cumulative logit proportional odds model that includes
# the main effect of Therapy and Gender.
#
d <- scan(what=list(Therapy="",Gender="",Response="",count=0))
Sequential Male Progressive 28
Sequential Male NoChange 45
Sequential Male Partial 29
Sequential Male Complete 26
Sequential Female Progressive 4
Sequential Female NoChange 12
Sequential Female Partial 5
Sequential Female Complete 2
Alternating Male Progressive 41
Alternating Male NoChange 44
Alternating Male Partial 20
Alternating Male Complete 20
Alternating Female Progressive 12
Alternating Female NoChange 7
Alternating Female Partial 3
Alternating Female Complete 1
d <- data.frame(d)
strata <- paste(sep="",d$Therapy,".",d$Gender)
d <- data.frame(d,strata)
d3 <- subset(d,Response!="Complete")
levels(d3$Response) <- c(NA,"NoChange","Partial","Progressive")
L.fct <- function(p) {
p <- matrix(p,4,4,byrow=T)
clogit <- c()
for (s in 1:4) {
clogit <- c(clogit,
log(sum(p[s,1]) /sum(p[s,2:4])),
log(sum(p[s,1:2])/sum(p[s,3:4])),
log(sum(p[s,1:3])/sum(p[s,4]))
)
}
L <- as.matrix(clogit)
rownames(L) <- c(paste(sep="","log odds(R < ",2:4,"|",d3$strata,")"))
L
}
a <- mph.fit(d$count,link=L.fct,X=model.matrix(~ -1 + Response + Therapy + Gender,data=d3) ,strata=strata)
# Fit the related non-proportional odds cumulative logit model
b <- mph.fit(d$count,link=L.fct,
X=model.matrix(~ Response+Response*Therapy + Response*Gender-1-Therapy-Gender,data=d3),strata=strata)
mph.summary(a,T)
mph.summary(b,T)
##########################################################################################################