FGBNMS_ Report

FGBNMS_

select only unique coverage points
# Load necessary libraries
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
select only unique coverage points
library(glue)
library(here)
here() starts at /home/tylar/repos/rvc_habitat_cover
select only unique coverage points
pattern <- paste0({params$inputFilePrefix}, "\\d{4}\\.csv")

# List all files matching the pattern
files <- list.files(
  path = here("data/01_raw"), pattern = pattern, full.names = TRUE
)

# Initialize an empty list to store data
data_list <- list()

# Loop over the files
for (file in files) {
  # Read the CSV file
  data <- read.csv(file)
  print(glue("reading {file}..."))
  
  # Select the desired columns
  selected_data <- data %>%
    select(LAT_DEGREES, LON_DEGREES, YEAR, MONTH, DAY, HABITAT_CD) %>%
    mutate(date = sprintf("%04d-%02d-%02d", YEAR, MONTH, DAY)) %>%
    mutate(`system:time_start` = as.numeric(as.POSIXct(
      date, format="%Y-%m-%d", tz="UTC"
    )) * 1000) %>%
    select( -MONTH, -DAY) %>%
    rename(latitude = LAT_DEGREES, longitude = LON_DEGREES)
  
  # Keep only the rows with unique values across the selected columns
  unique_data <- selected_data %>%
    distinct()
  
  # Append the data to the list
  data_list[[length(data_list) + 1]] <- unique_data
}
reading /home/tylar/repos/rvc_habitat_cover/data/01_raw/FGBNMS_2018.csv...
reading /home/tylar/repos/rvc_habitat_cover/data/01_raw/FGBNMS_2022.csv...
reading /home/tylar/repos/rvc_habitat_cover/data/01_raw/FGBNMS_2023.csv...
select only unique coverage points
# Combine all data into a single dataframe
combined_data <- do.call(rbind, data_list)
map the points
library(sf)
Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.4.0; sf_use_s2() is TRUE
map the points
library(ggplot2)
library(ggspatial)
# Convert the data to a spatial format (sf object)
selected_data_sf <- st_as_sf(combined_data, coords = c("longitude", "latitude"), crs = 4326)

# Plot the points on a map
ggplot(data = selected_data_sf) +
  annotation_map_tile(type = "osm") +  # Add a basemap (OpenStreetMap)
  geom_sf(aes(color = HABITAT_CD), size = 2, alpha = 0.7) +
  theme_minimal() +
  labs(title = "Map of Latitude and Longitude Points with Basemap",
       x = "Longitude",
       y = "Latitude",
       color = "Habitat Code") +
  theme(legend.position = "bottom")
Loading required namespace: raster
Zoom: 11
Fetching 2 missing tiles

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
...complete!

hab code habitat description
CONT_LR continuous low relief
ISOL_MR isolated mid-relief
ISOL_LR isolated low-relief
SPGR_LR spur and groove low relief
SPGR_HR spur and groove high relief
RUBB_LR rubble low relief
SAND_NA Sand
SGRS_NA Seagrass
UCHB_LR Unconsolidated Hardbottom
see distribution of cover points
library(ggplot2)

ggplot(combined_data, aes(x = HABITAT_CD)) +
  geom_bar() +
  labs(title = "Bar Chart of HABITAT_CD", x = "Habitat Code", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

see distribution of points over time
library(ggplot2)

# Create the stacked bar chart
ggplot(combined_data, aes(x = YEAR, fill = HABITAT_CD)) +
  geom_bar() +
  labs(title = "Points Collected Over Time", x = "Year", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

final data cleaning & write to csv
# Display the filtered unique data
combined_data <- combined_data %>%
  select(-YEAR)

write.csv(combined_data, here(glue("data/02_reduced/{params$inputFile}.csv")), row.names=FALSE)