# Script by Carmen Segura to reproduce the main results and Figures 3 and 4 in: C. Segura, A.L. Neal, L. Castro-Sardiňa, P. Harris, M.J. Rivero, L.M. Cardenas, J.G.N. Irisarri, Comparison of direct and indirect soil organic carbon prediction at farm field scale, Journal of Environmental Management, Volume 365, 2024, 121573, ISSN 0301-4797, https://doi.org/10.1016/j.jenvman.2024.121573. # load packages library(dplyr) library(ggplot2) library(patchwork) library(Hmisc) library(glmulti) library(lme4) library(lmerTest) library(MuMIn) library(mgcv) library(ggResidpanel) library(relaimpo) library(sjPlot) library(flextable) library(ggeffects) library(gratia) #### 1. Read data #### # setup your directory setwd("yourdirectory") # read data .txt ESPI.SOC<-read.table("data_source_Segura.txt", header=TRUE, stringsAsFactors=TRUE, sep="\t", na.strings="NA", dec=".", strip.white=TRUE) # IMPORTANT: we have named some variables in the manuscript different from the .txt and from this code. # variable in code and txt = variable in the manuscript # MEAN.TC = SOC # ESPI.MAR.OCT = ESPI # TEMP.MAR.OCT = TEMP # PPT.MAR.OCT = PPT # plough.manag = management # mow.early = mow.spring # mow.late = mow.summer #### 2. Prepare data #### # Our data: new data frame when ESPI count proportion is > 0.8 and yr > 2011 (before 2012 we do not have soil data) ESPI<-filter(ESPI.SOC,count.Prop.MAR.OCT.> 0.80 & yr >2011) # check structure str(ESPI) ESPI$yr<-as.factor(ESPI$yr) ESPI$yrs.without<-as.numeric(ESPI$yrs.without ) ESPI$mow.late <-as.numeric(ESPI$mow.late) ESPI$mow.early <-as.numeric(ESPI$mow.early) # NAs ESPI[ESPI==""]<-NA #### 3. Modelling using a set of open- and non-open-source variables (Supplementary material section C) #### # Explanatory variables: ESPI and all variables available from the NWFP related to land use and management, landscape, soils and climate #### 3.1. Lineal model #### # glmulti to fit best models, example with the definitive set of variables (see Supplementary material section C) lm.A_all<-glmulti(MEAN.TC~ESPI.MAR.OCT+ yr + plough.manag + grazing + veg.cover+ fert+ mow.early+ mow.late+ aspect.mean+ slope.mean+ soil.units+ TN+ pH+ TEMP.MAR.OCT+ PPT.MAR.OCT, data=ESPI, crit = aicc, level = 1, method = "h", family = gaussian, fitfunction = lm, confsetsize = 100) plot(lm.A_all) print(lm.A_all) plot(lm.A_all, type = "s") # Information Criteria and the Akaike weights of our best models weightable(lm.A_all)[1:6,] %>% regulartable() %>% autofit() # Best linear model after checking all the models selected by glmulti (the same processess aply over the modelling) # final.lm.all<-lm(MEAN.TC ~ plough.manag + soil.units + mow.late + TN + TEMP.MAR.OCT, data=ESPI) summary(final.lm.all) summary.aov(final.lm.all) aicc(final.lm.all) #diagnostic plots resid_panel(final.lm.all, plots = c("resid","qq","hist","lev"),type = NA, bins = 30, smoother = FALSE, qqline = TRUE, qqbands = FALSE, scale = 1, theme = "bw", axis.text.size = 12, title.text.size = 12, title.opt = TRUE, nrow = NULL) # relative importance of factors calc.relimp(final.lm.all, type = c("lmg"), rela = TRUE) # Further exploration to detect non linear relationships set_theme(base = theme_bw()) #to set ggplot theme in plot_model sjPlot plot_model(final.lm.all, type = "resid") # Slope of coefficients for each single predictor against the residuals plot_model(final.lm.all, type = "slope") # Slope of coefficients for each single predictor against the response # model validation #get predicted and observed values pred_lm.all <- as.data.frame(predict.lm(final.lm.all, ESPI, se.fit=TRUE, type="response")) #fitted values pred_lm.all$obs <- ESPI$MEAN.TC #observed values pred_lm.all$manag <- ESPI$plough.manag #to plot observed values grouped by management pred_lm.all%>% na.omit() pred_lm.all<-filter(pred_lm.all,!obs=="na") str(pred_lm.all) # predicted vs observed and r with(pred_lm.all,cor(obs, fit, method = c("pearson"))) # plot measured vs predicted grouped by management lm.pred.vs.obs.manag <- ggplot(pred_lm.all, aes(x=obs, y=fit, color=manag)) + geom_point(shape=20, size = 3, stroke = 1) + geom_abline(intercept=0, slope=1) + ylab("SOC predicted")+ scale_y_continuous(limits = c(2, 7)) + scale_x_continuous(limits = c(2, 7))+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ geom_smooth(method=lm , color="blue", fill="#69b3a2", se=TRUE) + theme_bw()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_blank(), legend.text = element_text(size = 12), legend.background = element_rect(fill = "transparent")) + geom_segment(aes(x = 6.3, y = 2, xend = 6.7, yend = 2),colour="black") + annotate(geom="text", x=6.95, y=2.03, label="1:1", size=6) lm.pred.vs.obs.manag # RMSE: to quantify prediction error, root mean squared error is the most commonly used measure of fit to test data # function rmse2 <- function(x, y, na.rm = TRUE){ res <- sqrt(mean((x-y)^2, na.rm = na.rm)) return(res) } # call to function rmse2(x = pred_lm.all$obs, y = pred_lm.all$fit) # RMSE = 0.16 % SOC #to report % RMSE mean.obser<-mean(pred_lm.all$obs) mean.obser rmse<-(0.1633583*(100/mean.obser)) rmse # RMSE(%)=3.52 ## Code example to reproduce LM predictions plot pred.TN <- ggpredict(final.lm.all, terms = c("TN","plough.manag")) plot.TN<-plot(pred.TN, add.data = TRUE ) + labs(x="TN (%)",y="SOC (%)", title="a")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.25, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent")) pred.manag <- ggpredict(final.lm.all, terms = c("plough.manag")) plot.manag<-plot(pred.manag, add.data = TRUE, colors = "darkgrey") + labs(x="Management",y="SOC (%)", title= "b")+ theme_classic()+ scale_x_discrete(limits=c("arable","improved p.","permanent p."))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent")) pred.soil <- ggpredict(final.lm.all, terms = c("soil.units","plough.manag")) plot.soil<-plot(pred.soil, add.data = TRUE) + labs(x="Soil units",y="SOC (%)", title="c")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.35, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent")) pred.TEMP <- ggpredict(final.lm.all, terms = c("TEMP.MAR.OCT","plough.manag")) plot.TEMP<-plot(pred.TEMP, add.data = TRUE) + labs(x="TEMP (°C)",y="SOC (%)", title="d")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ scale_y_continuous(breaks=(seq(2,8,1)), limits = c(2,8)) + #when ylimits doesn't work include limi in scale coord_cartesian(xlim=c(11.5, 13.5)) + scale_x_continuous(breaks=seq(11.5, 13.5,0.5))+ theme_classic() pred.mow <- ggpredict(final.lm.all, terms = c("mow.late","plough.manag")) plot.mow<-plot(pred.mow, add.data = TRUE ) + labs(x="Mow summer events",y="SOC (%)", title="e")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ #coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=(seq(2, 8,1)),limits =c(2, 8) )+ coord_cartesian(xlim=c(0, 2)) + scale_x_continuous(breaks=seq(0, 2 ,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.85, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent")) (plot.TN | plot.manag | plot.soil + theme(legend.position="none")|plot.TEMP + theme(legend.position="none") | plot.mow + theme(legend.position="none") ) #ordered by relative importance #### 3.2. Linear mixed model (Field as random factor) #### final.lmm.all<-lmer(MEAN.TC ~plough.manag + soil.units + mow.late + TN+ TEMP.MAR.OCT+ (1|Field.Name),data=ESPI) summary(final.lmm.all) anova(final.lmm.all) # we use F as proxy relative importance aicc(final.lmm.all) # diagnostic plots plot_model(final.lmm.all, type = "diag") plot_model(final.lmm.all, type = "re", show.values = F) # to visualize random effects # explained variability by random factor r.squaredGLMM(final.lmm.all) # Follow the examples in section 3.1 to reproduce LMM predictions plot and get RMSE #### 3.3. Generalised additive model #### final.gam.all<-gam(MEAN.TC ~plough.manag + soil.units + ESPI.MAR.OCT + TN + s(TEMP.MAR.OCT,k=6) , method = "REML",data=ESPI) summary(final.gam.all) anova(final.gam.all) aiccc(final.gam.all) # diagnostic plots set.seed(100) par(mfrow=c(2,2)) appraise(final.gam.all, method = "simulate") & theme_bw() concurvity(final.gam.all, full = TRUE) concurvity(final.gam.all, full = FALSE) # model without ESPI? final.gam.all.b<-gam(MEAN.TC ~plough.manag + soil.units + TN + s(TEMP.MAR.OCT,k=6) , method = "REML",data=ESPI) summary(final.gam.all.b) aicc(final.gam.all.b) # diagnostic plots set.seed(100) par(mfrow=c(2,2)) appraise(final.gam.all.b, method = "simulate") & theme_bw() concurvity(final.gam.all.b, full = TRUE) concurvity(final.gam.all.b, full = FALSE) # which is a best model? anova(final.gam.all.b,final.gam.all, test="F") # including ESPI does not improve the explanatory power of the model but p=0.0538, model diagnosis seems better with ESPI # Partial effects of the selected model tn.gam<-draw(parametric_effects(final.gam.all,terms = "TN")) & labs(title= "") & theme_bw() manag.gam<-draw(parametric_effects(final.gam.all,terms = "plough.manag")) & labs(x="Management",y="Partial effect",title= "") & theme_bw() soil.gam<-draw(parametric_effects(final.gam.all,terms = "soil.units")) & labs(x="Soil units",y="Partial effect",title= "") & theme_bw() ESPI.gam<-draw(parametric_effects(final.gam.all,terms = "ESPI.MAR.OCT")) & labs(x="ESPI",y="Partial effect",title= "") & theme_bw() sTEMP<-draw(final.gam.all,residuals=TRUE,resid_col = "grey") & labs(x="TEMP",y="Partial effect",title= "s(TEMP)") & theme_bw() tn.gam + manag.gam + soil.gam + ESPI.gam + sTEMP + plot_layout(ncol = 2, nrow=3) # model validation: measured vs predicted pred_gam.all <- as.data.frame(predict.gam(final.gam.all, ESPI, se.fit=TRUE, type="response")) #fit values pred_gam.all$obs <- ESPI$MEAN.TC #observed values pred_gam.all$manag <- ESPI$plough.manag pred_gam.all pred_gam.all%>% na.omit() pred_gam.all<-filter(pred_gam.all,!obs=="na") str(pred_gam.all) #coefficient of determination r with(pred_gam.all,cor(obs, fit, method = c("pearson"))) #measured vs predicted grouped by management gam.pred.vs.obs.manag <- ggplot(pred_gam.all, aes(x=obs, y=fit, color=manag)) + geom_point(shape=20, size = 3, stroke = 1) + geom_abline(intercept=0, slope=1) + xlab("SOC measured") + ylab("SOC fitted")+ scale_y_continuous(limits = c(2, 7)) + scale_x_continuous(limits = c(2, 7))+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) + theme_bw()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_rect(fill = NA), legend.text = element_text(size = 12), legend.background = element_rect(fill = "transparent"))+ geom_segment(aes(x = 6.3, y = 2, xend = 6.7, yend = 2),colour="black") + annotate(geom="text", x=6.95, y=2.03, label="1:1", size=6) # RMSE # function rmse2 <- function(x, y, na.rm = TRUE){ res <- sqrt(mean((x-y)^2, na.rm = na.rm)) return(res) } # call to function rmse2(x = pred_gam.all$obs, y = pred_gam.all$fit) #0.1325251 #with eq to report as % mean.obser<-mean(pred_gam.all$obs) mean.obser rmse<-(0.1325251*(100/mean.obser)) rmse #2.855949 # Cross validation for final.gam.all model (based on http://www.sthda.com/english/articles/38-regression-model-validation/157-cross-validation-essentials-in-r/ ) scores <- data.frame() #Create empty df to store CV R2 values devian.expl<-data.frame() set.seed(40) #Set seed for reproducibility k <- 10 #Define number of folds # For loop for each fold for (i in 1:k) { # Create weighting column. ESPI$weight <- sample(c(0,1),size=nrow(ESPI),replace=TRUE,prob=c(0.2,0.8)) #0 Indicates testing sample, 1 training sample. Split into training and test sample (80/20 ratio) ESPI$weight # Generate training dataset trainingdata <- ESPI[ESPI$weight == 1, ] #Select test data by weight trainingdata%>% na.omit() # Generate test dataset testdata <- ESPI[ESPI$weight == 0, ] #Select test data by weight testdata%>% na.omit() # Run GAM with training data m.training.nonopen<-gam(MEAN.TC ~plough.manag + soil.units +ESPI.MAR.OCT + TN + s(TEMP.MAR.OCT,k=6),method = "REML",data = trainingdata) summary(m.training.nonopen) # Predict using test data pred <- predict.gam(m.training.nonopen,newdata = testdata) pred #Extract scores scores[i,1] <- summary(m.training.nonopen)$r.sq #this is the adjusted because gam summary doesn't provide R-sq devian.expl[i,1]<- summary(m.training.nonopen)$dev.expl } scores devian.expl #Get average scores from each k-fold test av.r2.GMRF <- mean(scores$V1) av.r2.GMRF av.dev.GMRF <- mean(devian.expl$V1) av.dev.GMRF # print scores folds<-c(1,2,3,4,5,6,7,8,9,10) scores2 <- cbind(scores, folds) dev<-cbind(devian.expl,folds) # plot cv.nonopen.r<-ggplot(data=scores2, aes(x=folds, y=V1, group=1)) + geom_boxplot(color="#00b2ca") + geom_point(size=4, color="#00b2ca") + labs(y=expression(Radj^2),x="folds")+ scale_y_continuous(breaks=seq(0.97,1.00,0.005))+ scale_x_continuous(breaks=seq(1, 10,1))+ annotate(geom="text", x=8, y=0.99, label= expression(model~Radj^2~0.98), size=4)+ theme_classic() # Code to reproduce GAM predictions plot pred.TN.gam <- ggpredict(final.gam.all, terms = c("TN","plough.manag")) plot.TN.gam<-plot(pred.TN.gam, add.data = TRUE ) + labs(x="TN (%)",y="SOC (%)", title="a")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.25, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=0.7, y=8, label="F=1556.9", size=4) pred.TEMP.gam <- ggpredict(final.gam.all, terms = c("TEMP.MAR.OCT","plough.manag")) plot.TEMP.gam<-plot(pred.TEMP.gam, add.data = TRUE) + theme_bw() + labs(x="TEMP (°C)",y="SOC (%)", title="b")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ scale_y_continuous(breaks =(seq(2, 8,1)),limits =c(2, 8))+ coord_cartesian(xlim=c(11.5, 13.3)) + scale_x_continuous(breaks=seq(11.5, 13.3,0.3))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.45, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=12.9, y=7.6, label="edf = 3.1", size=4)+ annotate(geom="text", x=12.9, y=8, label="F=67.8", size=4) pred.soil.gam <- ggpredict(final.gam.all, terms = c("soil.units","plough.manag")) plot.soil.gam<-plot(pred.soil.gam, add.data = TRUE) + labs(x="Soil units",y="SOC (%)", title="c")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.35, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=5, y=8, label="F=15.7", size=4) pred.manag.gam <- ggpredict(final.gam.all, terms = c("plough.manag")) plot.manag.gam<-plot(pred.manag.gam, add.data = TRUE, colors = "darkgrey") + labs(x="Management",y="SOC (%)", title= "d")+ theme_classic()+ scale_x_discrete(limits=c("arable","improved p.","permanent p."))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ coord_cartesian(ylim=c(2, 8)) + scale_y_continuous(breaks=seq(2, 8,1))+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=3, y=8, label="F=7.0", size=4) #p=0.0015 pred.ESPI.gam <- ggpredict(final.gam.all, terms = c("ESPI.MAR.OCT","plough.manag")) plot.ESPI.gam<-plot(pred.ESPI.gam, add.data = TRUE) + theme_bw() + labs(x=expression(ESPI~(MJ~m^-2 ~d^-1)),y="SOC (%)", title="e") + scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ scale_y_continuous(breaks =(seq(2, 8,1)),limits =c(2, 8))+ coord_cartesian(xlim=c(0, 5)) + scale_x_continuous(breaks=seq(0, 5,1))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.45, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=4, y=8, label="F=3.6", size=4) plot.ESPI.gam (plot.TN.gam |plot.TEMP.gam + theme(legend.position="none") | plot.soil.gam + theme(legend.position="none")|plot.manag.gam + theme(legend.position="none") | plot.ESPI.gam + theme(legend.position="none")) #ordered by relative importance #### 3.4. Generalised additive mixed model (Field as random factor) #### final.gamm.all<-gam(MEAN.TC ~plough.manag + soil.units +ESPI.MAR.OCT + TN + s(TEMP.MAR.OCT,k=6) + s(Field.Name, bs="re") , method = "REML",data=ESPI) summary(final.gamm.all) aicc(final.gamm.all) # diagnostic plot par(mfrow=c(2,2)) gam.check(final.gamm.all) appraise(final.gamm.all, method = "simulate") & theme_bw() concurvity(final.gamm.all, full = TRUE) #concurvity issues when add random factor is a common problem when you have a covariate as a random factor, and you have other factors that are function of space concurvity(final.gamm.all, full = FALSE) # Partial effects tn.gamm<-draw(parametric_effects(final.gamm.all,terms = "TN")) & labs(title= "") & theme_bw() manag.gamm<-draw(parametric_effects(final.gamm.all,terms = "plough.manag")) & labs(x="Management",y="Partial effect",title= "") & theme_bw() soil.gamm<-draw(parametric_effects(final.gamm.all,terms = "soil.units")) & labs(x="Soil units",y="Partial effect",title= "") & theme_bw() ESPI.gamm<-draw(parametric_effects(final.gam.all,terms = "ESPI.MAR.OCT")) & labs(x="ESPI",y="Partial effect",title= "") & theme_bw() sterms<-draw(final.gamm.all,residuals=TRUE,resid_col = "grey") & theme_bw() sterms<-draw(final.gamm.all,residuals=TRUE,resid_col = "grey",terms = "TEMP.MAR.OCT") & labs(x="TEMP",y="Partial effect",title= "") & theme_bw() tn.gamm + manag.gamm + soil.gamm + ESPI.gamm + sterms + plot_layout(ncol = 2, nrow=3) # model validation: get predicted and observed values, r, RMSE and prediction plots following the script 3.1 as example #### 4. Modelling using a set of open- and non-open-source variables excluding TN (Supplementary material section D) #### #### 4.1. Linear model #### # Best model final.lm.allb<-lm(MEAN.TC ~ plough.manag + soil.units + ESPI.MAR.OCT + aspect.mean + TEMP.MAR.OCT, data=ESPI) #15/03/2023 LM selected model without TN was lmD.3, we call it final.lm.allb summary(final.lm.allb) summary.aov(final.lm.allb) # diagnostic plots resid_panel(final.lm.allb, plots = c("resid","qq","hist","lev"),type = NA, bins = 30, smoother = FALSE, qqline = TRUE, qqbands = FALSE, scale = 1, theme = "bw", axis.text.size = 12, title.text.size = 12, title.opt = TRUE, nrow = NULL) # relative importance of variables calc.relimp(final.lm.allb, type = c("lmg"), rela = TRUE) # further exploration set_theme(base = theme_bw()) plot_model(final.lm.allb, type = "resid") plot_model(final.lm.allb, type = "slope") # model validation: get predicted and observed values, r, RMSE and prediction plots following the script 3.1 as example #### 4.2. Linear mixed model (Field as random factor) #### # Best model final.lmm.allb<-lmer(MEAN.TC ~plough.manag + soil.units + ESPI.MAR.OCT + TEMP.MAR.OCT+ (1|Field.Name),data=ESPI) summary.aov(final.lmm.allb) # diagnostic plots and further exploration set_theme(base = theme_bw()) #to theme bw as other plots plot_model(final.lmm.allb, type = "diag") plot_model(final.lmm.allb, type = "re", show.values = F) #random effects # explained variability by random factor with library(MuMIn) r.squaredGLMM(final.lmm.allb) # model validation: get predicted and observed values, r, RMSE and prediction plots following the script 3.1 as example #### 4.3. Generalised additive model #### # GAM: we have two alternatives for the final model that are equivalents # GAM 1: with TEMP final1.gam.allb<-gam(MEAN.TC ~plough.manag + soil.units + s(aspect.mean,k=6) + s(TEMP.MAR.OCT,k=6), method = "REML",data=ESPI) summary(final1.gam.allb) anova(final1.gam.allb) # diagnostic plots par(mfrow=c(2,2)) gam.check(final1.gam.allb) DIAGNgamb.1<-appraise(final1.gam.allb, method = "simulate") & theme_bw() # Partial effects manag.gam.b.1<-draw(parametric_effects(final1.gam.allb,terms = "plough.manag")) & labs(x="Management",y="Partial effect",title= "") & theme_bw() soil.gam.b.1<-draw(parametric_effects(final1.gam.allb,terms = "soil.units")) & labs(x="Soil units",y="Partial effect",title= "") & theme_bw() aspect.gam.b.1<-draw(final1.gam.allb,terms = "aspect.mean") & labs(x="Aspect",y="Partial effect",title= "") & theme_bw() temp.gam.b.1<-draw(final1.gam.allb,terms = "TEMP.MAR.OCT") & labs(x="TEMP",y="Partial effect",title= "") & theme_bw() s.asp.temp.final1.b<-draw(final1.gam.allb,residuals=TRUE,resid_col = "grey") & theme_bw() manag.gam.b.1 + soil.gam.b.1 + s.asp.temp.final1.b + plot_layout(ncol = 1, nrow=3) # model validation: get predicted values vs measured pred_gam.allb.1 <- as.data.frame(predict.gam(final1.gam.allb, ESPI, se.fit=TRUE, type="response")) pred_gam.allb.1$obs <- ESPI$MEAN.TC pred_gam.allb.1$manag <- ESPI$plough.manag pred_gam.allb.1 pred_gam.allb.1%>% na.omit() pred_gam.allb.1<-filter(pred_gam.allb.1,!obs=="na") # coefficient of determination with(pred_gam.allb.1,cor(obs, fit, method = c("pearson"))) # measured vs predicted grouped by management gam.pred.vs.obs.managb1 <- ggplot(pred_gam.allb.1, aes(x=obs, y=fit, color=manag)) + geom_point(shape=20, size = 3, stroke = 1) + geom_abline(intercept=0, slope=1) + xlab("SOC measured") + ylab("SOC fitted")+ scale_y_continuous(limits = c(2, 7)) + scale_x_continuous(limits = c(2, 7))+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) + theme_bw()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_rect(fill = NA), legend.text = element_text(size = 12), legend.background = element_rect(fill = "transparent"))+ geom_segment(aes(x = 6.3, y = 2, xend = 6.7, yend = 2),colour="black") + annotate(geom="text", x=6.95, y=2.03, label="1:1", size=6) gam.pred.vs.obs.managb1 #cross validation CVgam(formula=MEAN.TC ~plough.manag + soil.units + s(aspect.mean,k=6) + s(TEMP.MAR.OCT,k=6), data=ESPI, nfold = 10, debug.level = 0, method = "REML", printit = TRUE, cvparts = NULL, gamma = 1, seed = 100) scores <- data.frame() devian.expl<-data.frame() set.seed(40) k <- 10 # For loop for each fold for (i in 1:k) { # Create weighting column. ESPI$weight <- sample(c(0,1),size=nrow(ESPI),replace=TRUE,prob=c(0.2,0.8)) # Generate training dataseT trainingdata <- ESPI[ESPI$weight == 1, ] # Generate test dataset testdata <- ESPI[ESPI$weight == 0, ] # Run GAM with training data m.training.nonopenb1<-gam(MEAN.TC ~plough.manag + soil.units + s(aspect.mean,k=6) + s(TEMP.MAR.OCT,k=6),method = "REML",data = trainingdata) summary(m.training.nonopenb1) # Predict using test data pred <- predict.gam(m.training.nonopenb1,newdata = testdata) # Extract scores[i,1] <- summary(m.training.nonopenb1)$r.sq devian.expl[i,1]<- summary(m.training.nonopenb1)$dev.expl } scores devian.expl # Get average scores from each k-fold test av.r2.GMRF <- mean(scores$V1) #0.8205425 av.r2.GMRF av.dev.GMRF <- mean(devian.expl$V1) #0.8494907 av.dev.GMRF # print scores folds<-c(1,2,3,4,5,6,7,8,9,10) scores2 <- cbind(scores, folds) dev<-cbind(devian.expl,folds) cv.nonopenb1.r<-ggplot(data=scores2, aes(x=folds, y=V1, group=1)) + geom_boxplot(color="#00b2ca") + geom_point() + ylab('R-sq.(adj)')+ scale_y_continuous(breaks=seq(0.80,0.85,0.01))+ scale_x_continuous(breaks=seq(1, 10,1))+ annotate(geom="text", x=8, y=0.85, label="model R-sq.(adj)=0.82", size=4)+ theme_classic() cv.nonopenb1.r #### Reproduce Figure 3 #### pred.manag.gamb.1 <- ggpredict(final1.gam.allb, terms = c("plough.manag")) plot.manag.gamb.1<-plot(pred.manag.gamb.1, add.data = TRUE, colors = "darkgrey") + labs(x="Management",y="SOC (%)", title= "a")+ theme_classic()+ scale_x_discrete(limits=c("arable","improved p.","permanent p."))+ scale_y_continuous(limits = c(0, 8)) + theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=3, y=8, label="F=117.4", size=4) pred.TEMP.gamb.1 <- ggpredict(final1.gam.allb, terms = c("TEMP.MAR.OCT","plough.manag")) plot.TEMP.gamb.1<-plot(pred.TEMP.gamb.1, add.data = TRUE) + labs(x="TEMP (°C)",y="SOC (%)", title="b")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ scale_y_continuous(limits = c(0, 8)) + coord_cartesian(xlim=c(11.5, 13.3)) + scale_x_continuous(breaks=seq(11.5, 13.3,0.3))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.25, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=13, y=8, label="F=28.0", size=4)+ #, p< 2e-16 annotate(geom="text", x=13, y=7.45, label="edf=4.4", size=4) pred.soil.gamb.1 <- ggpredict(final1.gam.allb, terms = c("soil.units","plough.manag")) plot.soil.gamb.1<-plot(pred.soil.gamb.1, add.data = TRUE) + labs(x="Soil units",y="SOC (%)", title="c")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_y_continuous(limits = c(0, 8)) + theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.25, 1), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=5.3, y=8, label="F=9.2", size=4) pred.asp.gamb.1 <- ggpredict(final1.gam.allb, terms = c("aspect.mean","plough.manag")) plot.asp.gamb.1<-plot(pred.asp.gamb.1, add.data = TRUE) + labs(x="Aspect (°)",y="SOC (%)", title="d")+ scale_color_manual(values = c("#E69F00","#AA4499","#44AA99"),labels = c("arable", "improved pasture", "permanent pasture"))+ scale_fill_manual(values = c("#E69F00","#AA4499","#44AA99"))+ scale_y_continuous(limits = c(0, 8)) + coord_cartesian(xlim=c(67.5, 337.5)) + scale_x_continuous(breaks=seq(67.5, 337.5,45))+ theme_classic()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.45, .91), legend.key = element_blank(), legend.text = element_text(size = 8), legend.background = element_rect(fill = "transparent"))+ annotate(geom="text", x=270, y=7.45, label="edf=4.1", size=4)+ annotate(geom="text", x=270, y=8, label="F=8.0", size=4)+ annotate(geom="text", x=90, y=0, label="E", size=4)+ annotate(geom="text", x=135, y=0, label="SE", size=4)+ annotate(geom="text", x=180, y=0, label="S", size=4)+ annotate(geom="text", x=225, y=0, label="SW", size=4)+ annotate(geom="text", x=270, y=0, label="W", size=4)+ annotate(geom="text", x=315, y=0, label="NW", size=4) (plot.manag.gamb.1 | plot.TEMP.gamb.1 | plot.soil.gamb.1 + theme(legend.position="none") | plot.asp.gamb.1 + theme(legend.position="none")) # GAM final 2: with PPT (in Supplementary material) final2.gam.allb<-gam(MEAN.TC ~plough.manag + soil.units + s(aspect.mean,k=6) + s(PPT.MAR.OCT,k=6), method = "REML",data=ESPI) summary(final2.gam.allb) anova(final2.gam.allb) AICc(final2.gam.allb) # diagnostic plot par(mfrow=c(2,2)) DIAGNgamb.2<-appraise(final2.gam.allb, method = "simulate")& theme_bw() # Partial effects manag.gam.b.2<-draw(parametric_effects(final2.gam.allb,terms = "plough.manag")) & labs(x="Management",y="Partial effect",title= "") & theme_bw() soil.gam.b.2<-draw(parametric_effects(final2.gam.allb,terms = "soil.units")) & labs(x="Soil units",y="Partial effect",title= "") & theme_bw() aspect.gam.b.2<-draw(final2.gam.allb,terms = "aspect.mean") & labs(x="Aspect",y="Partial effect",title= "") & theme_bw() temp.gam.b.2<-draw(final2.gam.allb,terms = "PPT.MAR.OCT") & labs(x="PPT",y="Partial effect",title= "") & theme_bw() s.asp.temp.final2.b<-draw(final2.gam.allb,residuals=TRUE,resid_col = "grey") & theme_bw() manag.gam.b.2 + soil.gam.b.2+ s.asp.temp.final2.b + plot_layout(ncol = 1, nrow=3) # follow scripts examples in 4.3 to reproduce the model validation plots #### 4.4. Generalised additive mixed model (Field as random factor) #### # with TEMP final1.gamm.allb<-gam(MEAN.TC ~plough.manag + soil.units + s(TEMP.MAR.OCT,k=6) + s(Field.Name, bs="re") , method = "REML",data=ESPI) # with PPT final2.gamm.allb<-gam(MEAN.TC ~plough.manag + soil.units + s(PPT.MAR.OCT,k=6) + s(Field.Name, bs="re") , method = "REML",data=ESPI) # follow scripts examples to reproduce the model diagnostic,validation and prediction plots #### 5. Modelling using a set of open-source variables (Supplementary material section E) #### #### 5.1. Linear model #### final.lm.open<- lm(MEAN.TC ~land.cover + ESPI.MAR.OCT + aspect.mean + slope.mean,data = ESPI) summary(final.lm.open) summary.aov(final.lm.open) anova(final.lm.open) AICc(final.lm.open) # relative importance of variables calc.relimp(final.lm.open, type = c("lmg"), rela = TRUE) # diagnostic plots resid_panel(final.lm.open, plots = c("resid","qq","hist","lev"),type = NA, bins = 30, smoother = FALSE, qqline = TRUE, qqbands = FALSE, scale = 1, theme = "bw", axis.text.size = 12, title.text.size = 12, title.opt = TRUE, nrow = NULL) set_theme(base = theme_bw()) plot_model(final.lm.open, type = "resid") plot_model(final.lm.open, type = "slope") # model validation: get predicted and observed values, r, RMSE and prediction plots following the script 3.1 as example #### 5.2. Linear mixed models #### # LMM with land cover final_lmer_Open1<- lmer(MEAN.TC ~land.cover + slope.mean + PPT.MAR.OCT + (1|Field.Name),data = ESPI) # final_lmm_open 1 summary(final_lmer_Open1) anova(final_lmer_Open1) aicc(final_lmer_Open1) r.squaredGLMM(final_lmer_Open1) # diagnostic plots set_theme(base = theme_bw()) par(mfrow=c(2,2)) plot_model(final_lmer_Open1, type = "diag") plot_model(final_lmer_Open1, type = "re", show.values = F) # LMM with ESPI final_lmer_Open2<- lmer(MEAN.TC ~ ESPI.MAR.OCT + slope.mean + PPT.MAR.OCT + (1|Field.Name),data = ESPI) #ESPI instead of land cover summary(final_lmer_Open2) anova(final_lmer_Open2) aicc(final_lmer_Open1) r.squaredGLMM(final_lmer_Open2) # diagnostic plots set_theme(base = theme_bw()) par(mfrow=c(2,2)) plot_model(final_lmer_Open2, type = "diag") plot_model(final_lmer_Open2, type = "re", show.values = F) #### 5.3. Generalised additive model #### # Best final model GAM final.gam.open<- gam(MEAN.TC ~ s(ESPI.MAR.OCT,k=6) +s(aspect.mean, k=6)+ s(slope.mean,k=8),method = "REML",data = ESPI) summary(final.gam.open) anova(final.gam.open) par(mfrow=c(2,2)) appraise(final.gam.open, method = "simulate")& theme_bw() gam.check(final.gam.open) concurvity(final.gam.open, full = TRUE) concurvity(final.gam.open, full = FALSE) # Partial effects s.open<-draw(final.gam.open,residuals=TRUE,resid_col = "grey") & theme_bw() # model validation: get predicted values and RMSE pred_gam.open <- as.data.frame(predict.gam(final.gam.open, ESPI, se.fit=TRUE, type="response")) pred_gam.open$obs <- ESPI$MEAN.TC pred_gam.open$cover <- ESPI$land.cover pred_gam.open%>% na.omit() pred_gam.open<-filter(pred_gam.open,!obs=="na") str(pred_gam.open) # measured vs predicted grouped by cover open.gam.pred.vs.obs.cover <- ggplot(pred_gam.open, aes(x=obs, y=fit, color=cover)) + geom_point(shape=20, size = 2, stroke = 1) + geom_abline(intercept=0, slope=1) + xlab("SOC measured") + ylab("SOC fitted")+ scale_y_continuous(limits = c(2, 7)) + scale_x_continuous(limits = c(2, 7))+ scale_color_manual(values = c("#E69F00","#117733"),labels = c("arable", "grasslands"))+ geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) + theme_bw()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_rect(fill = NA), legend.text = element_text(size = 12), legend.background = element_rect(fill = "transparent")) + geom_segment(aes(x = 6.3, y = 2, xend = 6.7, yend = 2),colour="black") + annotate(geom="text", x=6.95, y=2.03, label="1:1", size=6) open.gam.pred.vs.obs.cover #coeficient of determination r with(pred_gam.open,cor(obs, fit, method = c("pearson"))) #RMSE # function rmse2 <- function(x, y, na.rm = TRUE){ res <- sqrt(mean((x-y)^2, na.rm = na.rm)) return(res) } # call to function rmse2(x = pred_gam.open$obs, y = pred_gam.open$fit) # with eq to report as % mean.obser<-mean(pred_gam.open$obs) mean.obser rmse #13.07% rmse<-(0.6052815 *(100/mean.obser)) rmse #13.04397% # cross validation scores <- data.frame() devian.expl<-data.frame() set.seed(40) k <- 10 # For loop for each fold for (i in 1:k) { # Create weighting column. ESPI$weight <- sample(c(0,1),size=nrow(ESPI),replace=TRUE,prob=c(0.2,0.8)) ESPI$weight # Generate training dataset trainingdata <- ESPI[ESPI$weight == 1, ] # Generate test dataset testdata <- ESPI[ESPI$weight == 0, ] # Run GAM with training data m.training<-gam(MEAN.TC ~ s(ESPI.MAR.OCT,k=6) +s(aspect.mean, k=6)+ s(slope.mean,k=8),method = "REML",data = trainingdata) summary(m.training) # Predict using test data pred <- predict.gam(m.training,newdata = testdata) pred # Extract scores[i,1] <- summary(m.training)$r.sq devian.expl[i,1]<- summary(m.training)$dev.expl } scores devian.expl #Get average scores from each k-fold test av.r2.GMRF <- mean(scores$V1) #0.628242 av.r2.GMRF av.dev.GMRF <- mean(devian.expl$V1) #0.68 av.dev.GMRF #print scores folds<-c(1,2,3,4,5,6,7,8,9,10) scores2 <- cbind(scores, folds) dev<-cbind(devian.expl,folds) cv.open.r<-ggplot(data=scores2, aes(x=folds, y=V1, group=1)) + geom_boxplot(color="#00b2ca") + geom_point() + ylab('R-sq.(adj)')+ scale_x_continuous(breaks=seq(1, 10,1))+ annotate(geom="text", x=8, y=0.71, label="model R-sq.(adj)=0.64", size=4)+ theme_classic() cv.open.r #### Reproduce Figure 4 #### pred.gam.open.ESPI<-ggpredict(final.gam.open,terms=c("ESPI.MAR.OCT")) plot.gam.open.ESPI<-ggplot(pred.gam.open.ESPI, aes(x,predicted))+ geom_smooth(method="gam", formula = y ~s(x))+ geom_ribbon(aes(ymin=conf.low,ymax=conf.high),alpha=.1)+ labs(x=expression(ESPI~(MJ~m^-2 ~d^-1)),y="SOC (%)", title="c")+ scale_y_continuous(limits = c(0, 8)) + theme_classic() plot.gam.open.ESPI.dat<-plot.gam.open.ESPI + geom_point(data=ESPI, aes(ESPI.MAR.OCT,MEAN.TC,color=land.cover), size = 2,alpha=.5)+ scale_color_manual(values = c("#E69F00","#117733"),labels = c("arable", "grasslands"))+ theme(text = element_text(size=14))+ theme(legend.position="none")+ annotate(geom="text", x=4.5, y=8, label="F=5.2", size=4)+ annotate(geom="text", x=4.5, y=7.6, label="edf=2.4", size=4) pred.gam.open.aspect<-ggpredict(final.gam.open,terms=c("aspect.mean")) plot.gam.open.aspect<-ggplot(pred.gam.open.aspect, aes(x,predicted))+ geom_smooth(method="gam", formula = y ~s(x))+ geom_ribbon(aes(ymin=conf.low,ymax=conf.high),alpha=.1)+ scale_y_continuous(limits = c(0, 8)) + labs(x="Aspect (°)",y="SOC (%)", title="b")+ coord_cartesian(xlim=c(67.5, 337.5)) + scale_x_continuous(breaks=seq(67.5, 337.5,45))+ theme_classic()+ theme(text = element_text(size=14))+ annotate(geom="text", x=292, y=8, label="F=5.3", size=4)+ annotate(geom="text", x=292, y=7.6, label="edf=4.6", size=4)+ annotate(geom="text", x=90, y=0, label="E", size=4)+ annotate(geom="text", x=135, y=0, label="SE", size=4)+ annotate(geom="text", x=180, y=0, label="S", size=4)+ annotate(geom="text", x=225, y=0, label="SW", size=4)+ annotate(geom="text", x=270, y=0, label="W", size=4)+ annotate(geom="text", x=315, y=0, label="NW", size=4) plot.gam.open.aspect.dat<-plot.gam.open.aspect + geom_point(data=ESPI, aes(aspect.mean, MEAN.TC,color=land.cover),size = 2,alpha=.5)+ scale_color_manual(values = c("#E69F00","#117733"),labels = c("arable", "grasslands"))+ theme(legend.position="none") pred.gam.open.slope<-ggpredict(final.gam.open,terms=c("slope.mean")) plot.gam.open.slope<-ggplot(pred.gam.open.slope, aes(x,predicted))+ geom_smooth(method="gam", formula = y ~s(x))+ geom_ribbon(aes(ymin=conf.low,ymax=conf.high),alpha=.1)+ labs(x="Slope (°)",y="SOC (%)", title="a")+ scale_y_continuous(limits = c(0, 8)) + theme_classic()+ theme(text = element_text(size=14), legend.title = element_blank(), legend.position = c(.2, .95), legend.background = element_rect(fill = "transparent")) + annotate(geom="text", x=5.4, y=8, label="F=16.2", size=4)+ annotate(geom="text", x=5.4, y=7.6, label="edf=6.0", size=4) plot.gam.open.slope.dat<-plot.gam.open.slope + geom_point(data=ESPI, aes(slope.mean, MEAN.TC,color= land.cover),size = 2,alpha=.5)+ scale_color_manual(values = c("#E69F00","#117733"),labels = c("arable", "grasslands")) #add data plot.gam.open.slope.dat + plot.gam.open.aspect.dat +plot.gam.open.ESPI.dat # final model with land cover final.gam.openb<- gam(MEAN.TC ~ land.cover +s(aspect.mean, k=6)+ s(slope.mean,k=8),method = "REML",data = ESPI) #alternative summary(final.gam.openb) # get diagnostic, model validation and predictions following the previous script as example #### 5.4. Generalised additive mixed model final.gamm.open<- gam(MEAN.TC ~ s(ESPI.MAR.OCT,k=6) + s(slope.mean,k=8) +s(Field.Name, bs="re"),method = "REML",data = ESPI) summary(final.gamm.open) par(mfrow=c(2,2)) appraise(final.gamm.open, method = "simulate")& theme_bw() concurvity(final.gamm.open, full = TRUE) concurvity(final.gamm.open, full = FALSE) #Partial effects gamm.open<-draw(final.gamm.open,residuals=TRUE,resid_col = "grey") & theme_bw() # model validation: get predicted values pred_gamm.open <- as.data.frame(predict.gam(final.gamm.open, ESPI, se.fit=TRUE, type="response")) pred_gamm.open$obs <- ESPI$MEAN.TC pred_gamm.open$cover <- ESPI$land.cover pred_gamm.open%>% na.omit() pred_gamm.open<-filter(pred_gamm.open,!obs=="na") str(pred_gamm.open) # measured vs predicted grouped by cover open.gamm.pred.vs.obs.cover <- ggplot(pred_gamm.open, aes(x=obs, y=fit, color=cover)) + geom_point(shape=20, size = 2, stroke = 1) + geom_abline(intercept=0, slope=1) + xlab("SOC measured") + ylab("SOC fitted")+ scale_y_continuous(limits = c(2, 7)) + scale_x_continuous(limits = c(2, 7))+ scale_color_manual(values = c("#E69F00","#117733"),labels = c("arable", "grasslands"))+ geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) + theme_bw()+ theme(text = element_text(size=12), legend.title = element_blank(), legend.position = c(.15, .91), legend.key = element_rect(fill = NA), legend.text = element_text(size = 12), legend.background = element_rect(fill = "transparent")) + geom_segment(aes(x = 6.3, y = 2, xend = 6.7, yend = 2),colour="black") + annotate(geom="text", x=6.95, y=2.03, label="1:1", size=6) open.gamm.pred.vs.obs.cover # coefficient of determination with(pred_gamm.open,cor(obs, fit, method = c("pearson"))) #RMSE # function rmse2 <- function(x, y, na.rm = TRUE){ res <- sqrt(mean((x-y)^2, na.rm = na.rm)) return(res) } # call to function rmse2(x = pred_gamm.open$obs, y = pred_gamm.open$fit) # 0.5033977 # with eq to report as % mean.obser<-mean(pred_gamm.open$obs) mean.obser rmse<-(0.5033977*(100/mean.obser)) rmse #10.84835 # plot model predictions following previous scripts as example #### end ####