BFI predictors, Parita Bay, Panama

A spatial modeling approach

Authors

Diego J. Lizcano

Jorge Velásquez-Tibata

Published

May 22, 2024

Question

Which are the best spatial predictors for BFI?

Set up analysis

Load libraries and set some options.

Code

library(gt)
library(lubridate)
library(stringr)
library(readxl)
library(sf)
library(MuMIn) # multimodel inference
library(metafor)
eval(metafor:::.MuMIn) # helper functions we need so that MuMIn and metafor can interact 
library(visreg) # see trend
library(MASS) # stepAIC
library(terra)
library(sjPlot)
# library(mapview)
library(corrplot)
library(DT)

library(tidyverse)



options(scipen=99999)
options(max.print=99999)
options(stringsAsFactors=F)

Load Data

Code
covs <- read_csv("C:/CodigoR/AudubonPanama/shp/sites_covs_parita_nona.csv") |> mutate(site=Name) 

BFI_site<- read.csv("C:/CodigoR/AudubonPanama/data/BFI_site.csv", header = TRUE) # |> left_join(covs)
# convierte covs a puntos terra
# puntos <- vect(BFI_site, geom=c("Longitude", "Latitude"), crs="EPSG:4326")
# convierte a sf
# BFI_sf <- sf::st_as_sf(puntos)

Correlation in rasters

First examine correlation between possible predictors as raster layers

Code
BGB_Spawn <- rast("C:/CodigoR/AudubonPanama/raster/BGB Spawn.tif")
AGB_Spawn <- rast("C:/CodigoR/AudubonPanama/raster/AGB Spawn.tif")
NDVI <- rast("C:/CodigoR/AudubonPanama/raster/S2_NDVI_median_v2.tif")
roads <- rast("C:/CodigoR/AudubonPanama/raster/roads_final_v2.tif")
carbon_stock <- rast("C:/CodigoR/AudubonPanama/raster/soil_organic_carbon_stock_0-30m.tif")
canopy <- rast("C:/CodigoR/AudubonPanama/raster/canopy_height_jetz2.tif")
human_foot <- rast("C:/CodigoR/AudubonPanama/raster/human_footprint.tif")
river <- rast("C:/CodigoR/AudubonPanama/raster/rivers_final_v2.tif")
coast <- rast("C:/CodigoR/AudubonPanama/raster/coast_final_v2.tif")

forest_integrity <- rast("C:/CodigoR/AudubonPanama/raster/forest_integrity_index.tif")
# make elevation equal
# srtm_projected <- projectRaster(srtm_crop, crs=projection(NDVI), method="ngb") 
# elev <- mask(resample(rast(srtm_projected), NDVI), NDVI)

# list of terras SpatRasters  
many_rasters <- list(BGB_Spawn, AGB_Spawn, human_foot, NDVI, river, canopy, roads, forest_integrity, coast)
# terra stack
covs_many_raster <- rast(many_rasters)
names(covs_many_raster) <- c("BGB_Spawn","AGB_Spawn", 
                        "human_foot", "NDVI", "river",  
                        "canopy", "roads", "forest_integrity",
                        "coast")

plot(covs_many_raster)

Code

co <- layerCor(covs_many_raster, "pearson")
corrplot(co$correlation)

Correlation in sites (points) where the Audiomoth was installed

Code
# extract values from raster 
# covs_all1 <- terra::extract(covs_many_raster, puntos) 

# saved to avoid conflict between terra::extract and dplyr
# saveRDS(covs_all1, "C:/CodigoR/AudubonPanama/data/BFI/covs_all.RDS")
# save(covs_all, file = "C:/CodigoR/AudubonPanama/data/BFI/covs_all.Rda")
covs_all <- readRDS("C:/CodigoR/AudubonPanama/data/BFI/covs_all.RDS")
#load the rda file
# covs_all <- load(file = "C:/CodigoR/AudubonPanama/data/BFI/covs_all.Rda")
# write.csv(covs_all)

# covs_all$site <- puntos$site # ad site name

# change NA to 0 
# covs_all <- substr(covs_all, NA, 0) #r eplace in terra
covs_all[is.na(covs_all)] <- 0
M = cor(covs_all[,c(-1, -11)]) # removes ID and site
corrplot(M)

layer removed: BGB_Spawn,

Which model predicts BFI the best?

Information-theoretic approaches provide methods for model selection and (multi)model inference that differ quite a bit from more traditional methods based on null hypothesis testing (e.g., Anderson, 2008; Burnham & Anderson, 2002). These methods can also be used in the meta-analytic context when model fitting is based on likelihood methods.

We will now examine the fit and plausibility of various models, focusing on models that contain none, one, and up to eight of these possible predictors covariates.

With level = 1, we stick to models with main effects only. This implies that there are \(2^8\) = 256 possible models in the candidate set to consider. Since we want to keep the results for all these models (the default is to only keep up to 100 model fits), We set confsetsize=256. With crit=“AICc”, we select the Akaike Information Criterion corrected by small sample size, in this case: the AICc that we would like to compute for each model and that should be used for model selection and multimodel inference.

lets put all data in the same table

Code

# put in a table
dat1 <- BFI_site |> left_join(covs_all)
dat <- dat1 |> dplyr::select(bfi_unscaled, AGB_Spawn, human_foot, NDVI, river, canopy, roads, forest_integrity, coast) #bfi_scaled_scale

Next, we fit the full models with:

Code


full <- lm(bfi_unscaled~., data=dat)

# Now we can fit all 256 models and examine those models whose AICc value is no more than 2 units away from that of the best model with:
options(na.action = "na.fail")
res <- dredge(full, 
              rank = "AICc", 
              beta="none", 
              # extra = c("R^2"),
              m.lim = c(1, 2))
# subset(res, delta <= 2, recalc.weights=FALSE)
summary(model.avg(res))
#> 
#> Call:
#> model.avg(object = res)
#> 
#> Component model call: 
#> lm(formula = bfi_unscaled ~ <36 unique rhs>, data = dat)
#> 
#> Component models: 
#>    df  logLik   AICc delta weight
#> 5   3 -106.28 220.56  0.00   0.09
#> 1   3 -106.35 220.70  0.14   0.08
#> 6   3 -106.47 220.94  0.37   0.07
#> 4   3 -106.56 221.12  0.56   0.07
#> 8   3 -106.80 221.61  1.04   0.05
#> 2   3 -106.88 221.76  1.19   0.05
#> 68  4 -105.10 221.84  1.28   0.05
#> 56  4 -105.11 221.85  1.28   0.05
#> 3   3 -107.10 222.20  1.64   0.04
#> 15  4 -105.41 222.45  1.89   0.03
#> 7   3 -107.25 222.50  1.93   0.03
#> 16  4 -105.46 222.56  2.00   0.03
#> 24  4 -105.48 222.59  2.02   0.03
#> 46  4 -105.52 222.67  2.11   0.03
#> 45  4 -105.90 223.43  2.86   0.02
#> 17  4 -105.97 223.59  3.02   0.02
#> 12  4 -105.99 223.62  3.05   0.02
#> 25  4 -106.06 223.75  3.19   0.02
#> 36  4 -106.06 223.76  3.19   0.02
#> 14  4 -106.13 223.90  3.33   0.02
#> 35  4 -106.16 223.96  3.39   0.02
#> 58  4 -106.19 224.02  3.46   0.02
#> 18  4 -106.24 224.11  3.55   0.01
#> 57  4 -106.24 224.11  3.55   0.01
#> 26  4 -106.25 224.14  3.58   0.01
#> 13  4 -106.27 224.18  3.62   0.01
#> 23  4 -106.39 224.41  3.84   0.01
#> 48  4 -106.40 224.43  3.86   0.01
#> 47  4 -106.41 224.45  3.89   0.01
#> 67  4 -106.42 224.48  3.91   0.01
#> 34  4 -106.56 224.75  4.19   0.01
#> 28  4 -106.59 224.82  4.25   0.01
#> 78  4 -106.63 224.90  4.34   0.01
#> 38  4 -106.80 225.24  4.68   0.01
#> 27  4 -106.88 225.39  4.83   0.01
#> 37  4 -107.08 225.79  5.22   0.01
#> 
#> Term codes: 
#>        AGB_Spawn           canopy            coast forest_integrity 
#>                1                2                3                4 
#>       human_foot             NDVI            river            roads 
#>                5                6                7                8 
#> 
#> Model-averaged coefficients:  
#> (full average) 
#>                      Estimate   Std. Error  Adjusted SE z value Pr(>|z|)   
#> (Intercept)      2116.1250613  770.3443689  805.2709280   2.628  0.00859 **
#> human_foot         -3.7771240    8.7347453    9.1248547   0.414  0.67892   
#> AGB_Spawn          -2.3477508    5.8822383    6.1551340   0.381  0.70288   
#> NDVI             -672.3124189 1474.4659910 1538.0805656   0.437  0.66203   
#> forest_integrity    4.5153092   13.0028653   13.6493565   0.331  0.74079   
#> roads              -0.0037845    0.0135467    0.0142888   0.265  0.79112   
#> canopy             -1.2238164    4.5327548    4.7985293   0.255  0.79869   
#> coast               0.0034196    0.0223125    0.0240736   0.142  0.88704   
#> river               0.0003115    0.0051902    0.0056295   0.055  0.95587   
#>  
#> (conditional average) 
#>                      Estimate   Std. Error  Adjusted SE z value Pr(>|z|)   
#> (Intercept)       2116.125061   770.344369   805.270928   2.628  0.00859 **
#> human_foot         -15.042136    11.592792    12.733390   1.181  0.23748   
#> AGB_Spawn          -10.190900     8.382297     9.193579   1.108  0.26765   
#> NDVI             -2484.595203  1879.260918  2059.090224   1.207  0.22757   
#> forest_integrity    22.498787    20.924972    22.884708   0.983  0.32554   
#> roads               -0.022407     0.025870     0.028135   0.796  0.42579   
#> canopy              -7.581272     8.892665     9.718161   0.780  0.43532   
#> coast                0.027505     0.057809     0.063237   0.435  0.66360   
#> river                0.002691     0.015043     0.016350   0.165  0.86930   
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(res)

The graph shows the number of times the variable was selected in each model

Now we refit best linear model

Code

# 8. Refit best linear model
bestmodel <- get.models(res, 1)[[1]]
z <- lm(bestmodel, data = dat)
tab_model(summary(z))
  bfi_unscaled
Predictors Estimates CI p
(Intercept) 3259.63 1285.52 – 5233.75 0.004
AGB Spawn -9.52 -25.84 – 6.79 0.227
human foot -16.17 -39.71 – 7.37 0.160
NDVI -2514.72 -6259.15 – 1229.70 0.169
Observations 16
R2 / R2 adjusted 0.326 / 0.157

They are not statistically significant. However let’s go the next step

Checking assumptions

Code
library(see)
library(performance)
result2 <- check_normality(z)
plot(result2, type = "density")

Code
plot(result2, type = "qq")

Code
plot(result2, type = "pp")

Code

result3 <- check_heteroscedasticity(z)
plot(result3)

Code
check_predictions(z, check_range = TRUE)

Code


out <- check_model(z)
plot(out, type = "discrete_both")

In seems our “best” model according to the AIC:

bfi_unscaled ~ human_foot

Do not meet normality and the posterior predictive check is not that good. But normality is that important…?

lets try stepwise model selection by AIC

Let’s try analyzing the data using stepAIC() from the MASS package instead. Despite its name the method is not carrying out stepwise multiple regression. Rather, it is using a stepwise search strategy (hopefully) to find the “best” model (the model minimizing the AIC score) given certain restrictions.

Code
# 
z1 <- stepAIC(full, direction = "both")
#> Start:  AIC=177.7
#> bfi_unscaled ~ AGB_Spawn + human_foot + NDVI + river + canopy + 
#>     roads + forest_integrity + coast
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - roads             1      5398 351368 175.95
#> - human_foot        1      9560 355530 176.14
#> - NDVI              1     15168 361138 176.39
#> - river             1     23466 369436 176.75
#> - forest_integrity  1     25961 371931 176.86
#> - coast             1     34262 380232 177.22
#> - canopy            1     40220 386190 177.46
#> - AGB_Spawn         1     44288 390258 177.63
#> <none>                          345970 177.70
#> 
#> Step:  AIC=175.95
#> bfi_unscaled ~ AGB_Spawn + human_foot + NDVI + river + canopy + 
#>     forest_integrity + coast
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - human_foot        1      5149 356517 174.19
#> - forest_integrity  1     23214 374582 174.98
#> - river             1     30789 382158 175.30
#> - NDVI              1     37933 389301 175.59
#> - AGB_Spawn         1     39004 390372 175.64
#> - coast             1     39409 390777 175.65
#> - canopy            1     42982 394351 175.80
#> <none>                          351368 175.95
#> + roads             1      5398 345970 177.70
#> 
#> Step:  AIC=174.18
#> bfi_unscaled ~ AGB_Spawn + NDVI + river + canopy + forest_integrity + 
#>     coast
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - NDVI              1     32966 389484 173.60
#> - AGB_Spawn         1     42824 399341 174.00
#> - forest_integrity  1     47359 403876 174.18
#> <none>                          356517 174.19
#> - coast             1     49381 405899 174.26
#> - river             1     63398 419915 174.80
#> - canopy            1     79883 436400 175.42
#> + human_foot        1      5149 351368 175.95
#> + roads             1       988 355530 176.14
#> 
#> Step:  AIC=173.6
#> bfi_unscaled ~ AGB_Spawn + river + canopy + forest_integrity + 
#>     coast
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - coast             1     43986 433470 173.31
#> - AGB_Spawn         1     50602 440086 173.55
#> <none>                          389484 173.60
#> - forest_integrity  1     52671 442155 173.63
#> + NDVI              1     32966 356517 174.19
#> + roads             1     18018 371465 174.84
#> - river             1     88039 477523 174.86
#> - canopy            1    101458 490942 175.30
#> + human_foot        1       182 389301 175.59
#> 
#> Step:  AIC=173.31
#> bfi_unscaled ~ AGB_Spawn + river + canopy + forest_integrity
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - AGB_Spawn         1     36659 470129 172.61
#> - river             1     53126 486595 173.16
#> <none>                          433470 173.31
#> + coast             1     43986 389484 173.60
#> - canopy            1     68284 501754 173.65
#> - forest_integrity  1     72168 505638 173.78
#> + NDVI              1     27571 405899 174.26
#> + human_foot        1      4284 429185 175.15
#> + roads             1      2659 430811 175.21
#> 
#> Step:  AIC=172.61
#> bfi_unscaled ~ river + canopy + forest_integrity
#> 
#>                    Df Sum of Sq    RSS    AIC
#> - river             1     28278 498407 171.54
#> <none>                          470129 172.61
#> + AGB_Spawn         1     36659 433470 173.31
#> + NDVI              1     34464 435665 173.39
#> - canopy            1     89940 560069 173.41
#> + coast             1     30043 440086 173.55
#> - forest_integrity  1    123837 593966 174.35
#> + human_foot        1      5205 464924 174.43
#> + roads             1      3266 466863 174.50
#> 
#> Step:  AIC=171.55
#> bfi_unscaled ~ canopy + forest_integrity
#> 
#>                    Df Sum of Sq    RSS    AIC
#> <none>                          498407 171.54
#> - canopy            1     72385 570793 171.72
#> + NDVI              1     48542 449865 171.91
#> - forest_integrity  1     95559 593966 172.35
#> + river             1     28278 470129 172.61
#> + human_foot        1     19310 479097 172.91
#> + AGB_Spawn         1     11812 486595 173.16
#> + coast             1      9182 489226 173.25
#> + roads             1       259 498148 173.54
tab_model(summary(z1))
  bfi_unscaled
Predictors Estimates CI p
(Intercept) 1798.56 1393.56 – 2203.57 <0.001
canopy -11.80 -30.35 – 6.75 0.193
forest integrity 31.94 -11.77 – 75.64 0.138
Observations 16
R2 / R2 adjusted 0.199 / 0.076
Code

out2 <- check_model(z1)
plot(out2, type = "discrete_both")

In seems our “best” model according to the AIC:

bfi_unscaled ~ canopy + forest_integrity

Do not meet normality and the posterior predictive check is not that good.

Again no statistical significant predictor. However lets see the trends

Which model can be the best?

Lets compare the model performance

Code
lm1 <- lm(bfi_unscaled ~  human_foot, data = dat )
lm2 <- lm(bfi_unscaled ~  human_foot + human_foot:forest_integrity, data = dat)
lm3 <- lm(formula = bfi_unscaled ~ canopy + forest_integrity, data = dat)
result <- compare_performance(lm1, lm2, lm3)

DT::datatable(result)
Code
plot(result)

Sesion info

Code
print(sessionInfo(), locale = FALSE)
#> R version 4.3.2 (2023-10-31 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19042)
#> 
#> Matrix products: default
#> 
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] DT_0.32              performance_0.11.0.9 see_0.8.4.1         
#>  [4] sjPlot_2.8.16        terra_1.7-71         forcats_1.0.0       
#>  [7] dplyr_1.1.4          purrr_1.0.2          readr_2.1.5         
#> [10] tidyr_1.3.1          tibble_3.2.1         ggplot2_3.5.1       
#> [13] tidyverse_2.0.0      corrplot_0.92        MASS_7.3-60         
#> [16] visreg_2.7.0         metafor_4.6-0        numDeriv_2016.8-1.1 
#> [19] metadat_1.2-0        Matrix_1.6-1.1       MuMIn_1.47.5        
#> [22] sf_1.0-15            readxl_1.4.3         stringr_1.5.1       
#> [25] lubridate_1.9.3      gt_0.10.1           
#> 
#> loaded via a namespace (and not attached):
#>  [1] DBI_1.2.2           sandwich_3.1-0      rlang_1.1.3        
#>  [4] magrittr_2.0.3      multcomp_1.4-25     e1071_1.7-14       
#>  [7] compiler_4.3.2      vctrs_0.6.5         pkgconfig_2.0.3    
#> [10] fastmap_1.1.1       ellipsis_0.3.2      labeling_0.4.3     
#> [13] effectsize_0.8.8    utf8_1.2.4          rmarkdown_2.27     
#> [16] tzdb_0.4.0          xfun_0.44           cachem_1.0.8       
#> [19] jsonlite_1.8.8      sjmisc_2.8.10       ggeffects_1.6.0    
#> [22] R6_2.5.1            bslib_0.6.1         stringi_1.8.3      
#> [25] jquerylib_0.1.4     cellranger_1.1.0    estimability_1.5   
#> [28] Rcpp_1.0.12         knitr_1.46          zoo_1.8-12         
#> [31] parameters_0.21.7.1 splines_4.3.2       timechange_0.3.0   
#> [34] tidyselect_1.2.1    rstudioapi_0.16.0   yaml_2.3.8         
#> [37] codetools_0.2-19    sjlabelled_1.2.0    lattice_0.22-5     
#> [40] withr_3.0.0         bayestestR_0.13.2.1 coda_0.19-4.1      
#> [43] evaluate_0.23       survival_3.5-7      units_0.8-5        
#> [46] proxy_0.4-27        xml2_1.3.6          pillar_1.9.0       
#> [49] KernSmooth_2.23-22  stats4_4.3.2        insight_0.19.11.2  
#> [52] generics_0.1.3      mathjaxr_1.6-0      hms_1.1.3          
#> [55] munsell_0.5.0       scales_1.3.0        xtable_1.8-4       
#> [58] class_7.3-22        glue_1.7.0          emmeans_1.10.1     
#> [61] tools_4.3.2         mvtnorm_1.2-4       grid_4.3.2         
#> [64] crosstalk_1.2.1     datawizard_0.10.0.4 colorspace_2.1-0   
#> [67] nlme_3.1-163        cli_3.6.2           fansi_1.0.6        
#> [70] sjstats_0.19.0      gtable_0.3.4        sass_0.4.8         
#> [73] digest_0.6.34       classInt_0.4-10     TH.data_1.1-2      
#> [76] farver_2.1.1        htmlwidgets_1.6.4   htmltools_0.5.7    
#> [79] lifecycle_1.0.4