# This file demonstrates fitting three types of discounting models to one group of subjects library(nlme) # loads the library used for nonlinear multilevel modeling commands library(lattice) # loads the library that includes xyplot # R will look for the data in the working directory. If your file isn't there, you either need to # move it there (use "getwd()" to find out what it is) or change the working directory (it's in your menu) ############################################################################################################ # I'm assuming that your data is in the long format, in comma-delimited form, and has no missing values. # If you have missing values, you may need to use na.strings in the read.csv call. ############################################################################################################ myd<-read.csv("Example one group data.dat") # look at a summary of the data and plot the individual subject data summary(myd) # as.factor treats the variable as categorical; the "|" creates separate plots for each subject; # type="l" means that it plot a connected line xyplot(Value~Delay | as.factor(Subject), data=myd, type="l") # We'll use a simple nls to estimate a good starting value for logk # This should not be used for repeated measures analysis because nls assumes each measure is independent simp<-nls(Value~500/(1+exp(logk)*Delay), data=myd, start=c(logk=-2)) summary(simp) # summarizes the model we just ran; I'll use -4 as the starting value in multilevel model # Let's dive right in and fit a hyperbolic to the data. Note that the immediate amount was 500. myr<-nlme(Value~500/(1+exp(logk)*Delay), fixed=logk~1, random=logk~1|Subject, data=myd, start=c(logk=-4)) # Note, if you have convergence issues, you might try the following use of 'tol' and choose larger values # myr<-nlme(Value~500/(1+exp(logk)*Delay), fixed=logk~1, random=logk~1|Subject, data=myd, start=c(logk=-4), control=c(nlmeControl(tol=.1))) # Let's look at the results multiple ways summary(myr) coef(myr) # This shows the best fitting logk for each subject hist(coef(myr)$logk) # This plots a histogram of those values hist(resid(myr)) # This plots a histogram of the residuals; should be normal-ish # Now, let's look at our fits superimposed on the data # Note that xyplot won't handle missing data well, so if data are missing, you'll need to exclude them # using something like "data=subset(myd, !is.na(Value))" in place of "data=myd". "!" is NOT xyplot(fitted(myr)+Value~Delay|as.factor(Subject), data=myd, type=c("a", "p"), distribute.type=T) ############################################################################################################ # HYPERBOLOIDS # Okay, let's try the Green-Myerson and Rachlin models and allow k and s to vary across subjects ############################################################################################################ myrGM<-nlme(Value~500/(1+exp(logk)*Delay)^s, fixed=logk+s~1, random=logk+s~1|Subject, data=myd, start=c(logk=-4, s=1)) myrRach<-nlme(Value~500/(1+exp(logk)*Delay^s), fixed=logk+s~1, random=logk+s~1|Subject, data=myd, start=c(logk=-4, s=1)) # You can look at summaries of both using "summary" command. I'm going to look at the AIC for each of them AIC(myr, myrGM, myrRach) # Remember, lower is better. One is much worse - surprising. # Let's plot all three on the data and use different line types ("lty") xyplot(fitted(myr)+fitted(myrGM)+fitted(myrRach)+Value~Delay|as.factor(Subject), data=myd, type=c("a", "a", "a", "p"), distribute.type=T, lty=c(1, 2, 3, 4), auto.key=T) # When you look at the coef of myrGM, you'll discover a problem. We could try different starting values, # but I tried that and it didn't help. Instead, I didn't let s vary across subjects and got a much better fit. myrGM<-nlme(Value~500/(1+exp(logk)*Delay)^s, fixed=logk+s~1, random=logk~1|Subject, data=myd, start=c(logk=-4, s=1)) AIC(myr, myrGM, myrRach) # Now, Green-Myerson beats hyperbolic as expected # Plot again... much better! xyplot(fitted(myr)+fitted(myrGM)+fitted(myrRach)+Value~Delay|as.factor(Subject), data=myd, type=c("a", "a", "a", "p"), distribute.type=T, lty=c(1, 2, 3, 4), auto.key=T) # Let's compare the fits; note how well-behaved Green-Myerson is if s isn't allowed to vary. par(mfrow=c(1,2)) plot(coef(myrGM)$logk~coef(myr)$logk, ylab="GM Hyperboloid log(k)", xlab="Hyperbolic log(k)", ylim=c(-7,3)) plot(function(x) x, -10,10, add=T, lty=2) plot(coef(myrRach)$logk~coef(myr)$logk, ylab="Rachlin Hyperboloid log(k)", xlab="Hyperbolic log(k)", ylim=c(-7,3)) plot(function(x) x, -10,10, add=T, lty=2) # The following does the same plot but with the size of the circle indicating the value of "s" symbols(coef(myr)$logk, coef(myrGM)$logk,circles=coef(myrGM)$s, inches=1/10, ylab="GM Hyperboloid log(k)", xlab="Hyperbolic log(k)", ylim=c(-7,3)) plot(function(x) x, -10,10, add=T, lty=2) # I had to add .25 to the circle size for Rachlin because this fit produced a negative s for one subject symbols(coef(myr)$logk, coef(myrRach)$logk,circles=coef(myrRach)$s+.25, inches=1/10, ylab="Rachlin Hyperboloid log(k)", xlab="Hyperbolic log(k)", ylim=c(-7,3)) plot(function(x) x, -10,10, add=T, lty=2) # BTW, after a careful search, I could get GM to fit with a starting value for k of 3.7 or 3.8, # but the fit was worse than when s was fixed. I could also get variation when k started at 1.4, s was .5 # and I relaxed the error tolerance. The fit was good, but s was mimicking k. # Bottom line is that Green-Myerson was proving hard to fit for this data set when k and s both allowed to vary. ############################################################################################################ # To do individual fits using nlsList, see the following example for the hyperbolic; it's not recommended for hyperboloids ############################################################################################################ myrList<-nlsList(Value~500/(1+exp(logk)*Delay) | Subject, data=myd, start=c(logk=-4)) summary(myrList) # shows the individual estimates, the SE, t, and p-value sapply(myrList,AIC) # if you want the individual AIC values; not surprisngly, Subject 1 had the worst fit. # As an aside, the exp(logk) trick wasn't necessary for the last analysis - you could estimate k directly for nlsList