Home‎ > ‎Schedule‎ > ‎

R commands for March 27

# Commands for example Multinomial Logit example (when there are multiple non-ordered outcome variables)
install.packages("mlogit")
library(mlogit)

#Converting data to appropriate format

etymology.mlogit = mlogit.data(etymology,shape="wide",choice="EtymAge")

# A (not terribly informative) model giving the log odds ratio for each outcome relative to Dutch (the default reference class) 
summary(mlogit(EtymAge ~ 1, data=etymology.mlogit))

# A model giving the log odds ratio for each outcome relative to IndoEuropean  
summary(mlogit(EtymAge ~ 1, data=etymology.mlogit,reflevel="IndoEuropean"))
# A model showing how the log odds of each outcome decreases as the number of words with a  shared stem increases
summary(mlogit(EtymAge ~ 1|NcountStem, data=etymology.mlogit))

# Commands for example Ordinal Logit example (when there are multiple ordered outcome variables)
library(Design)
#specifying ordering of variables
etymology$EtymAge = ordered(etymology$EtymAge, levels = c("Dutch","DutchGerman", "WestGermanic", "Germanic", "IndoEuropean"))
etymology.dd = datadist(etymology)
lrm(EtymAge ~ NcountStem,data=etymology)


#Interactions
library(rms)

data = data.frame(x=c(-7.4503855, 33.7717681, -7.9101377, -2.7971393, -2.2282866, 5.2334246, -3.5538627, -0.4625002, -2.0667265, 5.1264264, -3.4151379, -1.5276722, 8.3360963, -4.0415953, -9.5245320, -18.0249328, 11.5317049, 10.6761641, -16.4991934, 4.9495576, -0.9548466, -15.8105822, -0.4292703, 3.3068233, 2.4865054, 24.0185448, 10.0890220, 10.0918972, -1.5960759, -7.2818033, 4.9238007, 13.5079987, 11.0305050, 24.0398751, -10.1446549, 5.9537229, -6.4051657, 0.1638454, -17.0310884, -8.4645397, 11.2408045, 6.6361288, 4.4861939, 8.3807474, -13.1119015, 0.5620654, 29.7718527, -9.2402587, 1.0080807, 17.6459464, -8.2345416, -23.7369629, 9.4013625, 20.9770998, -0.1213083, 37.2883212, 1.3960429, 1.8369027, 6.5722008, 7.7998969, -29.6435218, -8.2564628, 8.5532488, -33.9487010, -28.5319618, -28.6999551, 33.3474762, 22.6708527, -7.6358080, 6.7703893, -12.1595420, 8.4512831, -3.2846951, -7.7961918, -2.5321185, 11.4361471, 5.1657681, 14.3654264, -13.4304761, 19.5653454, 17.6804044, -1.3214685, 12.8310225, 42.9705130, -5.4960750, 46.2539184, -6.3259302, 0.4485299, -1.6278171, -12.8814005, -11.9078218, 24.9932327, 0.4775661, -10.1086728, 9.5988199, -1.5437201, -13.4661716, 1.1671320, -6.3072374, 16.0633466), y= c(991.5183, 1027.9742,  989.5567,  959.1312, 1004.6476, 1003.3502, 1002.5241, 992.7737,  984.8252, 1000.3512, 1002.7763,  994.5810, 1012.2689, 1004.4084, 989.5628,  987.2725, 1013.5066,  999.7798,  985.3723, 993.5764, 1000.0530, 976.2671, 1006.9512, 997.2926, 1012.9543, 1007.7511, 1001.7950, 1013.5084, 978.5805,  986.3428,  991.3399, 1033.7036, 1041.0641, 1016.5342,  995.3308, 1018.5047, 1000.2473, 1005.7526,  988.0059,  969.3288, 1026.5463, 1013.2947, 997.3559, 1025.7909,  988.4578, 1003.8441, 1021.2364,  992.0261,  992.4964, 1014.3022,  999.0285,  976.8534, 1014.0519, 1012.5062, 1015.4847, 1022.7617, 991.4465, 1001.3647, 1012.1202,  999.8695,  979.6766,  974.4323, 1008.0540, 976.1013,  973.1441,  977.8663, 1025.1615, 1021.8148,  984.1896,  996.0043, 991.4497, 997.9412, 1008.3965,  995.4772,  978.0273, 1004.2221,  995.9249, 1032.2460,  983.2112, 1007.4135, 1016.9159, 996.3040, 1006.8056, 1032.1790, 991.9243, 1037.4083,  979.4405, 1009.1507, 1003.2604,  988.7266,  988.2310, 1013.0020, 1006.3662,  992.1800, 1000.6936, 1014.1251,  986.6540, 1010.1897, 1008.0944, 1010.2430) ,z  = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1) )


plot(data$x,data$y)
plot(data[data$z == 0,]$x,data[data$z == 0,]$y)
lines(data[data$z == 1,]$x,data[data$z == 1,]$y,type="p",pch=3)

summary(lm(y ~ x + z,data=data))
summary(lm(y ~ x*z,data=data))

# Model comparison [Functions for explanation only. You can use rms functions lrtest and AIC]
# An important distinction is made between nested models and non-nested models. One model M1 is said to be nested in another model M2, when the predictors in model M1 are a subset of the predictors in model M2. For example...

M1 = lrm(RealizationOfRecipient ~ LengthOfTheme,data=dative)
M2 = lrm(RealizationOfRecipient ~ LengthOfTheme + AnimacyOfRec,data=dative)

# Nested models should be compared using a likelihood ratio test. This works by taking twice the difference between the log likelihood of the two models. This gives us a test statistic. Such values are known to be chi-squared, and we can use a chi-squared test (with degrees of freedom equal to the difference between the two models in the number of predictors) to determine the probability of seeing a difference this or more extreme by chance. See below for definition of the function.

# When there is no such relationship, the models are said to be non-nested. For example.

M3 = lrm(RealizationOfRecipient ~ LengthOfTheme + LengthOfRecipient,data=dative)
M4 = lrm(RealizationOfRecipient ~ LengthOfTheme + AnimacyOfRec,data=dative)

#Non-nested models should be compared using AIC values. The AIC value for a model is twice the negative log likelihood of the model plus twice the number of parameters in the model.

#For nested models

LRtest <- function(model1,model2){
print(model1$dev[2] - model2$dev[2])
print(1 - pchisq((model1$dev[2] - model2$dev[2]),(model2$stats['d.f.'] - model1$stats['d.f.'])))
}

LRtest(M1,M2)

# For non-nested models
aic <- function(model) {
round(unname(model$dev[2] + 2*(model$stats['d.f.']+1)),2) 
}
aic(M1)

Comments