library(nlme) library(lattice) # This file shows lots of variations involving a single predictor or two predictors, but no interactions. # Hopefully, one of these examples provides a good model. # Be sure that you either place the command and data files in your working directory or CHANGE # your working directory (see R menus). To check the current working directory, use "getwd()". myd<-read.csv("Young E1 DelayDiscounting.dat") summary(myd) # Note, if a categorical variable is designated with numbers, R will assume it's continuous. # If so, do something like this: myd$condition<-as.factor(myd$condition), before continuing. # We'll need to do this for Cluster in a later example. ############################################################################################################ # For this data set, I have some very noisy data from a titration procedure. Variables included # two between-subject predictors: sex (categorical) and video game experience (continuous). # The data also includes missing values for VG experience and some missing delays. ############################################################################################################ # I'll start with an analysis involving none of the predictors to help me find good starting values mynls<-nls(Indifference.point~1000/(1+exp(logk)*Delay), data=myd, start=c(logk=-2)) summary(mynls) # suggests that -6 is a good starting value for logk # If you want to explore various starting values of k, you may experience some unhelpful "solutions." # Fit the multilevel hyperbolic model myr<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~1, random=logk~1|Subject, data=myd, start=c(logk=-6)) summary(myr) hist(coef(myr)$logk) # the following highlights the messiness of the original data and the quality of the fits xyplot(fitted(myr)+Indifference.point~Delay|Subject, data=myd, type="a") # let's explore sex differences myrSex<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Sex, random=logk~1|Subject, data=myd, start=c(logk=-6, 0)) summary(myrSex) # Men had shallower discount rates (logk women = -5.73, men = -6.95), but it's not significant (p = .2066) # Let's try a continuous predictor, VG experience myrVG<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~VGExperience, random=logk~1|Subject, data=myd, start=c(logk=-6, 0)) # We get an error message - nlme doesn't like the missing values for this predictor, so I'll # screen them out in the data statement. myrVG<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~VGExperience, random=logk~1|Subject, data=subset(myd, !is.na(VGExperience)), start=c(logk=-6, 0)) summary(myrVG) # Higher values of VG Experience produced steeper discounting (higher logk), but it wasn't signifcant (p = .1579) # Note, just like in a regression, the intercept here (-7.5) corresponds to the logk when the other # predictors are zero (in this case, no video game experience). We'll plot the relationship to observe it. plot(function(vgexp) -7.502613+.124074*vgexp, 0, 21, xlab="Total VG Experience", ylab="Predicted logk") # Note that if I try to plot the fits for the model with missing values, xyplot won't properly align the # predictions with the rows. xyplot(fitted(myrVG)+Indifference.point~Delay|Subject, data=myd, type="a") # So, I need to screen out the missing values in the plot, too xyplot(fitted(myrVG)+Indifference.point~Delay|Subject, data=subset(myd, !is.na(VGExperience)), type="a") ############################################################################################################ # Let's use Cluster as a predictor ############################################################################################################ # In the original experiment, I performed a cluster analysis of the TYPES of video game experience that # each subject self-reported. I identified three clusters. This is a categorical variable, so... myd$Cluster<-as.factor(myd$Cluster) # One subject doesn't have a cluster, so I'll have to watch for those missing values. # The following command shows the default dummy coding R will use for this variable. contrasts(myd$Cluster) myrCluster<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster, random=logk~1|Subject, data=subset(myd, !is.na(Cluster)), start=c(logk=-6, 0)) # Starting values aren't the correct length? Oh, my three-level categorical variable needs two dummy # variables to be estimated in addition to the intercept. myrCluster<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster, random=logk~1|Subject, data=subset(myd, !is.na(Cluster)), start=c(logk=-6, 0, 0)) summary(myrCluster) # The intercept estimates the k for Cluster 1 (both dummy variables with predictor values of 0), # the intercept plus cluster2 dummy weight estimates k for Cluster 2, and the intercept plus cluster3 dummy # weight estimates the k for Cluster 3. Although there are no differences, let's plot them anyway. xyplot(fitted(myrCluster)+Indifference.point~Delay|Cluster, group=Subject, data=subset(myd, !is.na(Cluster)), type="a") # The second and third p-values correspond to the difference between cluster 2 and 1 and cluster 3 and 1, # respectively. The mean and SE for Cluster 1 is in the results summary. To obtain means + SE for # the second and third conditions, you'll need the multcomp package. library(multcomp) contrast.matrix <- rbind( 'Cluster 2' = c(1, 1, 0), 'Cluster 3' = c(1, 0, 1)) comps <- glht(myrCluster, contrast.matrix) summary(comps) # Note that the myrCluster summary suggests that Cluster 2's logk (-7.39) is lower than Cluster 1's (-5.29) # The following command would test all of the comparisons to produce an omnibus F instead of two t values. anova(myrCluster) # ...which suggests that there are no differences among the clusters. # Note that the anova command here returns Type I sum of squares, and this usually isn't what you want # when you have multiple predictors. However, it suffices here. If you want Type III sum of squares, # you can type "anova(myrCluster, type="marginal"), but here it doesn't matter. # If we wanted to use two of these predictors at the same time and not their interaction, it would work like this myrMult<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster+Sex, random=logk~1|Subject, data=subset(myd, !is.na(Cluster)), start=c(logk=-6, 0, 0, 0)) # note that I had to change the starting values. I needed intercept, two for cluster dummies and one for sex dummy. summary(myrMult) # A whole lot of nothing, but I like effect coding when multiple predictors are used. contrasts(myd$Sex)=contr.sum(2) contrasts(myd$Cluster)=contr.sum(3) myrMult<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster+Sex, random=logk~1|Subject, data=subset(myd, !is.na(Cluster)), start=c(logk=-6, 0, 0, 0)) summary(myrMult) anova(myrMult, type="marginal") # Here, the intercept, -6.4, corresponds to the estimate of the "average" subject because # Sex=0 now represents halfway between male and female and Cluster1=0 and Cluster2=0 represents # "average" across clusters. To see the effect coding to know which numbers to plug into the # resulting regression equation: contrasts(myd$Sex) contrasts(myd$Cluster) # To compare all of my models AIC(myr, myrSex, myrVG, myrCluster, myrMult) # Note the warning! AIC can only be compared for models based on the same data, and some of these # models had to screen out rows with missing data. Look at the "Number of Obserations" in each # model summary. Thus, we can compare myr to myrSex, and myrCluster to myrMult. If you wanted to # compare all of them, you'd need to refit each to the same data by screening out the same NAs for all # of them in the "data=" statement or on a new dataset. I'm lazy so I'll do the latter: newdata<-subset(myd, !is.na(VGExperience)&!is.na(Cluster)) myr<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~1, random=logk~1|Subject, data=newdata, start=c(logk=-6)) myrSex<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Sex, random=logk~1|Subject, data=newdata, start=c(logk=-6, 0)) myrCluster<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster, random=logk~1|Subject, data=newdata, start=c(logk=-6, 0, 0)) myrVG<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~VGExperience, random=logk~1|Subject, data=newdata, start=c(logk=-6, 0)) myrMult<-nlme(Indifference.point~1000/(1+exp(logk)*Delay), fixed=logk~Cluster+Sex, random=logk~1|Subject, data=newdata, start=c(logk=-6, 0, 0, 0)) AIC(myr, myrSex, myrVG, myrCluster, myrMult) # All five provided similar fits with the simples model with no predictors being best (lowest AIC). # Note that AIC differences less than 2 are meaningless, and differences less than about 5 represent # modest evidence in favor of the model with the lowest AIC. See Wagenmakers & Farrell, 2004, PB&R. # Generally speaking, if the model you choose can be refit to more data (i.e., without screening # out NAs for a variable that is not part of that model), then you should do so. # None of these examples involved within-subject variables, but they are easily added as random # effects. See Young (2016) for the syntax. ################################################################################################### # If you want to output the coefficients for graphing in other software, the following demonstrates # doing so for one of our models. ################################################################################################### write.csv(coef(myrSex), "filename.txt") # Note that this file does not include the Sex of each individual, so you need that data to determine # when to add the Sex fixed effect adjustment. There is likely an easier way to do this, but the # following will work. If you discover an easier way, please email me! jill<-aggregate(myd$Sex, by=list(myd$Subject), FUN=function(x) unique(x)) jack<-coef(myrSex) jack$Subject<-rownames(coef(myrSex)) mytemp<- merge(jack, jill, by.x="Subject", by.y="Group.1") write.csv(mytemp, "filename.txt")