### Nonlinear Least Squares Fitting of Selection Curves, including the Peak-Logistic ### J.A.Smith & M.D. Taylor 2014 (The University of NSW) ### Version 1.0, 08.01.2014; made using R console v3.0.2 ### Visit www.famer.unsw.edu.au/software.html for updates ##Housekeeping rm(list=ls(all=TRUE)) graphics.off() setwd("C:/R_scripts/PeakLogistic") #for example #install.packages("MASS") library(MASS) ##LOAD data mydata <- read.table("Example_Data.csv",header=T,sep=",") #2 columns with headers; example data is mulloway (see Smith & Taylor 2014) x <- mydata[,1] #Size classes y_raw <- mydata[,2] #Relative number (proportion, etc.) of fish caught in each size class y <- (y_raw/sum(y_raw))/max((y_raw/sum(y_raw))) #Standardised selection (range from 0-1) ##ENTER data for peak-logistic (these are parameter starting values for the algorithm; these must be close to the final values; change these if error during nls()) plot(x,y) #evaluate plot to generate approximate staring values for peak-logistic parameters dL_1 = 200 #delta length 1 (length range between 1% and 99% increasing catchability 1) dL_2 = 300 #delta length 2 (length range between 1% and 99% decreasing catchability 2) L50_1 = 500 #length at 50% selection 1 L50_2 = 700 #length at 50% selection 2 a_1 = 1 #maximum relative selection 1 a_2 = 0.5 #maximum relative selection 2 upperlim <- c(500, 600, 600, 900, 2, 2) #Upper bounds for parameters - use these to avoid unrealistic curve shapes lowerlim <- c(100, 100, 300, 600, 0.3, 0.3) #Lower bounds for parameters - use these to avoid unrealistic curve shapes ##PLOT best guess for peak-logistic; use to generate better parameter starting values yy <- a_1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2))) lines(x,yy) ##FIT 5-parameter peak-logistic (a_1 = 1) nlfit1 <- nls(y ~ 1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2))), data = mydata, start = list (dL_1=dL_1,dL_2=dL_2,L50_1=L50_1,L50_2=L50_2,a_2=1), algorithm = "port", lower = lowerlim, upper = upperlim) summary(nlfit1) #Prints the parameter estimates #summary(nlfit1,correlation=TRUE) #use to evaluate correlations between parameters n <- nrow(mydata) P1 <- coef(nlfit1) SSR1 = sum(residuals(nlfit1)^2); SSR1 #Sum of the squared residuals MSE1 = SSR1/(n-5); MSE1 #The mean square error - use this to compare different curves Rsq1 = 1-(sum(residuals(nlfit1)^2)/sum((y-mean(y))^2)); Rsq1 #R-squared value; be aware of the limits of Rsq for non-linear models (Kvalseth 1985, "Cautionary note about R2". Amer. Statist. 39) if ((0.5*P1[1]+P1[3]) > (0.5*P1[2]+P1[4])) { print("WARNING - logistic curves overlapping unrealistically: change upper and lower limits to constrain algorithm") } #a rule of thumb to avoid unrealistic solutions: L50_1+0.5*dL_1 < L50_2+0.5*dL_2 ##FIT 6-parameter peak-logistic (a_1 unfixed) nlfit2 <- nls(y ~ a_1/(1+exp(-(log(99^2)/dL_1)*(x-L50_1))) - a_2/(1+exp(-(log(99^2)/dL_2)*(x-L50_2))), data = mydata, start = list (dL_1=dL_1,dL_2=dL_2,L50_1=L50_1,L50_2=L50_2,a_1=a_1,a_2=a_2), algorithm = "port", lower = lowerlim, upper = upperlim) summary(nlfit2) #Prints the parameter estimates P2 <- coef(nlfit2) SSR2 = sum(residuals(nlfit2)^2); SSR2 #Sum of the squared residuals MSE2 = SSR2/(n-6); MSE2 #The mean square error - use this to compare different curves Rsq2 = 1-(sum(residuals(nlfit2)^2)/sum((y-mean(y))^2)); Rsq2 if ((0.5*P2[1]+P2[3]) > (0.5*P2[2]+P2[4])) { print("WARNING - logistic curves overlapping unrealistically: change upper and lower limits to constrain algorithm") } #a rule of thumb to avoid unrealistic solutions: L50_1+0.5*dL_1 < L50_2+0.5*dL_2 ##PLOT the two peak-logistic functions plot(x,y) names <- c("peaklogistic 5par","peaklogistic 6par","lognormal","assymetric normal","binormal") legend(min(x),max(y),names,lty=c(1,1,1,1,1),lwd=c(2,2,2,2,2),col=c("black","blue","green","red","purple"),cex=0.8) xx <- seq(min(x),max(x),by=((max(x)-min(x))/100)) y1 <- 1/(1+exp(-(log(99^2)/P1[1])*(xx-P1[3]))) - P1[5]/(1+exp(-(log(99^2)/P1[2])*(xx-P1[4]))) lines(xx,y1) #5 parameter peak logistic y2 <- P2[5]/(1+exp(-(log(99^2)/P2[1])*(xx-P2[3]))) - P2[6]/(1+exp(-(log(99^2)/P2[2])*(xx-P2[4]))) lines(xx,y2,col="blue") #6 parameter peak logistic ##FIT log-normal for comparison xl <- fitdistr(x, "lognormal") #estimate starting values for nls mu1 <- as.numeric(xl$estimate[1]) sigma1 <- as.numeric(xl$estimate[2]) sf <- 100; #a scaling factor used to normalise the lognormal y axis nlfit3 <- nls(y ~ (sf/(x*(sqrt(2*pi*(sig1^2)))))*(exp(-1*((log(x)-mu1)^2)/(2*sig1^2))), data = mydata, start = list (mu1=mu1,sig1=sigma1,sf=sf), algorithm = "port") summary(nlfit3) P3 <- coef(nlfit3) SSR3 = sum(residuals(nlfit3)^2); SSR3 #Sum of the squared residuals MSE3 = SSR3/(n-3); MSE3 #The mean square error - use this to compare different curves Rsq3 = 1-(sum(residuals(nlfit3)^2)/sum((y-mean(y))^2)); Rsq3 y3 <- (P3[3]/(xx*(sqrt(2*pi*(P3[2]^2)))))*(exp(-1*((log(xx)-P3[1])^2)/(2*P3[2]^2))) lines(xx,y3,col="green") ##FIT two-sided normal for comparison xn <- fitdistr(x, "normal") #estimate starting values for nls mu2 <- as.numeric(xn$estimate[1]) sigma2 <- as.numeric(xn$estimate[2]) a_n <- 0 #the non-zero asymptote (between 0-1) for the RHS of the assymetric normal curve lowerlim2 <- c(mu2/10, sigma2/10, sigma2/10, 0) #lower limits; specify that a_n must be > 0 upperlim2 <- c(mu2*10, sigma2*10, sigma2*10, 1) nlfit4 <- nls(formula = y ~ ifelse(test = x <= mu2, ye = exp(-0.5*((x-mu2)/sig2)^2), no = exp(-0.5*((x-mu2)/sig3)^2)*(1-a_n)+a_n), data = mydata, start = list (mu2=mu2,sig2=sigma2,sig3=sigma2, a_n=a_n), algorithm = "port", lower = lowerlim2, upper = upperlim2) summary(nlfit4) P4 <- coef(nlfit4) SSR4 = sum(residuals(nlfit4)^2); SSR4 #Sum of the squared residuals MSE4 = SSR4/(n-4); MSE4 #The mean square error - use this to compare different curves Rsq4 = 1-(sum(residuals(nlfit4)^2)/sum((y-mean(y))^2)); Rsq4 y4 <- ifelse(test = xx <= P4[1], ye = exp(-0.5*((xx-P4[1])/P4[2])^2), no = exp(-0.5*((xx-P4[1])/P4[3])^2)*(1-P4[4])+P4[4]) lines(xx,y4,col="red") ##IF FITTING ERROR, plot best guess for two-sided normal to generate better starting estimates yyy <- ifelse(test = x <= mu2, ye = exp(-0.5*((x-mu2)/sigma2)^2), no = exp(-0.5*((x-mu2)/sigma2)^2)*(1-a_n)+a_n) plot(x,y) lines(x,yyy) ##ENTER data for binormal plot(x,y) #evaluate plot to generate approximate staring values for binormal parameters mu3 = 600 #length at maximum catchability in first normal curve mu4 = 900 #length at maximum catchability in second normal curve sigma4 = 70 #width of first normal curve (standard deviation) sigma5 = 70 #width of second normal curve (standard deviation) delta = 1 #maximum height of first normal curve (if 1, scales to maximum of 1) w = 0.5 #maximum relative height of second normal curve lowerlim3 <- c(mu3/10, mu4/10, sigma4/10, sigma5/10, 0, 0) #lower limits; all parameters must be > 0 upperlim3 <- c(mu3*10, mu4*10, sigma4*10, sigma5*10, 2.5, 1) ##Plot best guess for binormal to generate better starting estimates yyyy <- (1/delta)*(exp(-1*((x-mu3)^2)/(2*sigma4^2))+w*exp(-1*((x-mu4)^2)/(2*sigma5^2))) plot(x,y) lines(x,yyyy) ##FIT binormal for comparison nlfit5 <- nls(formula = y ~ (1/del)*(exp(-1*((x-mu3)^2)/(2*sig4^2))+w*exp(-1*((x-mu4)^2)/(2*sig5^2))), data = mydata, start = list (mu3=mu3,mu4=mu4,sig4=sigma4,sig5=sigma5,del=delta,w=w), algorithm = "port", lower = lowerlim3, upper = upperlim3) summary(nlfit5) P5 <- coef(nlfit5) SSR5 = sum(residuals(nlfit5)^2); SSR5 #Sum of the squared residuals MSE5 = SSR5/(n-6); MSE5 #The mean square error - use this to compare different curves Rsq5 = 1-(sum(residuals(nlfit5)^2)/sum((y-mean(y))^2)); Rsq5 y5 <- (1/P5[5])*(exp(-1*((xx-P5[1])^2)/(2*P5[3]^2))+P5[6]*exp(-1*((xx-P5[2])^2)/(2*P5[4]^2))) lines(xx,y5,col="purple") ##KEY RESULTS; lowest MSE is usually best SSR <- c(SSR1,SSR2,SSR3,SSR4,SSR5) MSE <- c(MSE1,MSE2,MSE3,MSE4,MSE5) Results <- cbind(names,SSR,MSE); Results