index.qmd

---
title: "Land Cover Analysis"
---

```{r include=FALSE}
knitr::opts_chunk$set(fig.pos = "H", out.extra = "")
backup_options <- options()
options(scipen = 1, digits = 2)
do_eval <- FALSE

# Loading ggplot as it is cumbersome to use :: notation for it
library(ggplot2)
```

## Loading data

We load the possible sites (`quiet = TRUE` is for not displaying verbose loading information). We also load the two halves of the far north land cover dataset, along with the the attribute table of land cover classes. We load the ecodistrict data and select for the relevant lowlands disctrict, coded as 1028.

```{r message=FALSE}
sites_possible <- sf::st_read(
  "data/sites/GRTS_PossibleCaARU_sample_draw_base.shp", 
  quiet = TRUE) |> 
  dplyr::mutate(source = "GRTS_PossibleCaARU_sample_draw_base") |> 
  dplyr::mutate(fullID = paste(SampleID, ID, source, sep = "_"))

additional_sites <- readr::read_csv("data/sites/Selected_Peat_Sites.csv") |> 
  sf::st_as_sf(coords=c("lon_WGS84", "lat_WGS84"), crs = 4326)  |> 
  sf::st_transform(sf::st_crs(sites_possible)) |> 
  dplyr::mutate(source = "Selected_Peat_Sites") |> 
  dplyr::mutate(fullID = paste(SampleID, ID, source, sep = "_"))

all_sites <- sites_possible |> 
  dplyr::bind_rows(additional_sites)

ecodistrict <- sf::st_read(
  "data/ecodistrict_shp/Ecodistricts/ecodistricts.shp", 
  quiet = TRUE) |> 
  dplyr::filter(ECODISTRIC == 1028)
lu_16 <- raster::raster("data/land_use/FarNorth_LandCover_Class_UTM16.tif")
lu_17 <- raster::raster("data/land_use/FarNorth_LandCover_Class_UTM17.tif")
lu_dat <- readr::read_csv("data/land_use/attr_table_northen_ont_lc.txt") |>
  dplyr::mutate(cats = as.factor(code))
```

## Plotting spatial data

It is always a good idea to try and plot spatial data before any processing.

```{r fig.width=13}
ggplot() +
  geom_sf(data = ecodistrict) +
  geom_sf(data = sf::st_transform(all_sites, 
                                  sf::st_crs(ecodistrict))) +
  theme_bw()
```

Plotting the land cover data is difficult because it is provided is two different UTMs.

## Extracting Land Cover data

The following functions will take care of land cover extraction for sites.

```{r message=FALSE}
extract_from_points <- function(scale_m, sites, lu) {
  
  sites_buffer <- sites |>
    sf::st_transform(sf::st_crs(lu)) |> 
    sf::st_buffer(dist = scale_m) |> 
    dplyr::select(fullID)
  
  extr <- exactextractr::exact_extract(lu, sites_buffer,
                                       progress = FALSE,
                                       include_cols = "fullID")
  
  extr <- mapply(extr, 1:length(extr), 
                 FUN = \(x, y) dplyr::mutate(x, id = y),
                 SIMPLIFY = F)
  
  extr_df <- do.call(rbind, extr) |>
    dplyr::filter(!is.na(value)) |>
    dplyr::relocate(id)
  
  return(extr_df)
  
}

compute_land_cover <- function(scale_m, sites, 
                               lu_16, lu_17, lu_dat,
                               summarise_all = TRUE) {
  
  extr_16_df <- extract_from_points(scale_m, sites, lu_16)
  extr_17_df <- extract_from_points(scale_m, sites, lu_17)
  
  stopifnot(all(!(extr_16_df$siteID %in% extr_17_df$siteID)))
  
  extr <- rbind(extr_16_df, extr_17_df) |>
    dplyr::arrange(id, value)
  
  if (summarise_all) {
    
    extr_table <- extr |>
      dplyr::group_by(value) |>
      dplyr::summarise(coverage_fraction_sum = sum(coverage_fraction)) |>
      dplyr::mutate(prop = 
                      coverage_fraction_sum/sum(coverage_fraction_sum)) |>
      dplyr::ungroup()  |>
      dplyr::mutate(value = as.factor(value)) |>
      dplyr::left_join(lu_dat, by = c("value" = "cats")) |>
      dplyr::select(category_code, prop, label)
         
  } else {
    
     extr_table <- extr |>
      dplyr::group_by(fullID, value) |>
      dplyr::summarise(coverage_fraction_sum = sum(coverage_fraction)) |>
      dplyr::mutate(prop = 
                      coverage_fraction_sum/sum(coverage_fraction_sum)) |>
      dplyr::ungroup()  |>
      dplyr::mutate(value = as.factor(value)) |>
      dplyr::left_join(lu_dat, by = c("value" = "cats")) |>
      dplyr::select(fullID, category_code, prop, label)
    
  }
  
  extr_table[is.na(extr_table)] <- 0
  
  return(extr_table)
  
}
```

We extract at different scales (buffer radius around points): 1 m, 50 m, 100 m and 1 km.

```{r message=FALSE}
res_points <- mapply(FUN = compute_land_cover, 
                     c(`1 m` = 1, `50 m` = 50,
                       `100 m` = 100, `1 km` = 1000),
                     MoreArgs = list(
                       sites = sites_possible,
                       lu_16 = lu_16, lu_17 = lu_17, lu_dat = lu_dat),
                     SIMPLIFY = F) |>
  dplyr::bind_rows(.id = 'scale') |>
  dplyr::mutate(scale = forcats::fct_relevel(scale, "1 m", "50 m",
                                             "100 m", "1 km"),
                label =  forcats::fct_reorder(label, prop)) |> 
  dplyr::arrange(scale, dplyr::desc(prop))

knitr::kable(res_points)
```

We also want to do the same operation for the ecodistrict to allow for comparison. We don't need to use exact extraction, insteadt the crop and mask each raster. This operation is costly so we write out the rasters and load them again (see unrendered code).

```{r eval=FALSE}
ecodistrict_16 <- sf::st_transform(ecodistrict, sf::st_crs(lu_16))
ecodistrict_17 <- sf::st_transform(ecodistrict, sf::st_crs(lu_17))

lu_16_crop <- raster::crop(lu_16, ecodistrict_16)
lu_16_crop_mask <- raster::mask(lu_16_crop, ecodistrict_16)

lu_17_crop <- raster::crop(lu_17, ecodistrict_17)
lu_17_crop_mask <- raster::mask(lu_17_crop, ecodistrict_17)
```

```{r eval=FALSE, include=FALSE}
raster::writeRaster(lu_16_crop_mask, "data/lu_16_crop_mask.tif", 
                    overwrite=TRUE)
raster::writeRaster(lu_17_crop_mask, "data/lu_17_crop_mask.tif", 
                    overwrite=TRUE)
```

```{r eval=TRUE, include=FALSE}
lu_16_crop_mask <- raster::raster("data/lu_16_crop_mask.tif")
lu_17_crop_mask <- raster::raster("data/lu_17_crop_mask.tif")
```

We can then get the frequencies of values. This operation is also costly so we write out the objects and load them again (see unrendered code).

```{r eval=FALSE}
lu_16_freq <- raster::freq(lu_16_crop_mask)
lu_17_freq <- raster::freq(lu_17_crop_mask)
```

```{r eval=FALSE, include=FALSE}
saveRDS(lu_16_freq, "data/lu_16_freq.rds")
saveRDS(lu_17_freq, "data/lu_17_freq.rds")
```

```{r eval=TRUE, include=FALSE}
lu_16_freq <- readRDS("data/lu_16_freq.rds")
lu_17_freq <- readRDS("data/lu_17_freq.rds")
```

We combine the results of both UTMs.

```{r}
res_ecodistrict <- rbind(lu_16_freq, lu_17_freq) |>
  as.data.frame() |> 
  dplyr::group_by(value) |> 
  dplyr::summarise(count = sum(count)) |> 
  dplyr::ungroup() |> 
  dplyr::filter(!is.na(value)) |> 
  dplyr::mutate(prop = count/sum(count)) |> 
  dplyr::mutate(value = as.factor(value)) |>
  dplyr::left_join(lu_dat, by = c("value" = "cats")) |>
  dplyr::filter(!is.na(label)) |> 
  dplyr::select(category_code, prop, label) |> 
  dplyr::mutate(scale = "Ecodistrict") |> 
  dplyr::relocate(scale) |>
  dplyr::arrange(scale, dplyr::desc(prop))

knitr::kable(res_ecodistrict)
```

And then combine the results between scales and utm.

```{r}
res <- rbind(res_points, res_ecodistrict) |> 
  tidyr::complete(scale, label) |> 
  tidyr::replace_na(list(prop = 0)) |> 
  dplyr::mutate(label = forcats::fct_reorder(label, prop))
```

For individual site identity, at different scales:

```{r}
res_points_by_site <- 
  mapply(FUN = compute_land_cover, 
         c(`1 m` = 1, `50 m` = 50,
           `100 m` = 100, `1 km` = 1000),
         MoreArgs = list(
           sites = sites_possible,
           lu_16 = lu_16, lu_17 = lu_17, lu_dat = lu_dat),
           summarise_all = FALSE,
         SIMPLIFY = F) |>
  dplyr::bind_rows(.id = 'scale') |>
  dplyr::mutate(scale = forcats::fct_relevel(scale, "1 m", "50 m",
                                             "100 m", "1 km"),
                label =  forcats::fct_reorder(label, prop)) |>
  
  dplyr::group_by(scale, fullID)  |> 
  dplyr::arrange(dplyr::desc(prop)) |> 
  dplyr::rename(primary_category_code = category_code,
                primary_prop = prop,
                primary_label = label) |> 
  dplyr::mutate(secondary_category_code = primary_category_code[2],
                secondary_prop = primary_prop[2],
                secondary_label = primary_label[2],
                prop_sum = primary_prop + secondary_prop) |> 
  dplyr::slice(1) |> 
  dplyr::ungroup() |> 
  dplyr::arrange(fullID)

DT::datatable(res_points_by_site)
```

We save this table.

```{r}
readr::write_csv(res_points_by_site, "outputs/res_points_by_site.csv")
```

## Results

We can plot the results with "dodged" ggplot2 barplots.

```{r fig.width=13, fig.height=8}

my_pal <- c('#c7e9b4','#7fcdbb','#41b6c4','#1d91c0','#225ea8','#0c2c84')

ggplot(res) +
  geom_bar(aes(x = label, y = prop, fill = scale, colour = scale), 
           alpha = 0.8,
           stat = "identity",
           position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust = 1)) +
  labs(x = "Land Use Class", y = "Proportion",
       fill = "Scale", colour = "Scale") +
  scale_fill_manual(values = my_pal) +
  scale_color_manual(values = my_pal)
```

Removing the land use classes than are not present around sites, we get a slightly easier graph to read.

```{r fig.width=13, fig.height=8}
only_at_sites <- res |> 
  dplyr::filter(prop > 0) |> 
  dplyr::filter(scale != "Ecodistrict") |> 
  dplyr::pull(label)

res_filt <- res |> 
  dplyr::filter(label %in% only_at_sites)

ggplot(res_filt) +
  geom_bar(aes(x = label, y = prop, fill = scale, colour = scale), 
           alpha = 0.8,
           stat = "identity",
           position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 20, vjust = 1, hjust = 1)) +
  labs(x = "Land Use Class", y = "Proportion",
       fill = "Scale", colour = "Scale") +
  scale_fill_manual(values = my_pal) +
  scale_color_manual(values = my_pal)
```