DVT Dataset Prep

This workflow prepares an export of the data modified for display on the FCRWQDC Data Visualization Tool.

load and prep data

library(here)
library(magrittr)
library(dplyr)

source(here("SEACARProgramCompare/mapProgramNameToShortName.R"))

df <- readr::read_delim(
  here("data/Discrete WQ - 10006.txt"),
  delim = "|"
) %>%
  mutate(ProgramName = mapProgramNameToShortName(ProgramName))


# keep only columns of interest
cols_of_interest <- c(
  "ProgramName", 
  "ProgramLocationID", 
  "OriginalLatitude", "OriginalLongitude", 
  "ActivityDepth_m", 
  "ParameterName", 
  "ParameterUnits",
  "ResultValue", 
  "SampleDate"
)
df <- df %>% dplyr::select(all_of(cols_of_interest))

# create data/exports if DNE
dir.create(here::here("data", "exports"), showWarnings = FALSE)

# # save to csv
# write.csv(df, here::here("data", "exports", "dashboardDataSEACAR.csv"), row.names = FALSE)

create dashboard stations file

# Creates dashboardStations.csv with station metadata and parameter counts
library(tidyr)
library(lubridate)

# Count points for each parameter at each station
station_parameter_counts <- df %>%
  group_by(ProgramName, ProgramLocationID, OriginalLatitude, OriginalLongitude, ParameterName) %>%
  count(name = "point_count") %>%
  ungroup()

# Calculate date range for each station
station_date_check <- df %>%
  group_by(ProgramName, ProgramLocationID) %>%
  summarise(
    startDate = min(SampleDate, na.rm = TRUE),
    endDate = max(SampleDate, na.rm = TRUE),
    .groups = "drop"
  )

# calculate min & max depths for station
station_depth_check <- df %>%
  group_by(ProgramName, ProgramLocationID) %>%
  summarise(
    minDepth = min(ActivityDepth_m, na.rm = TRUE),
    maxDepth = max(ActivityDepth_m, na.rm = TRUE),
    .groups = "drop"
  )

# Create station summary with separate columns for each parameter
dashboard_stations <- station_parameter_counts %>%
  pivot_wider(
    names_from = ParameterName,
    values_from = point_count,
    values_fill = 0
  ) %>%
  left_join(station_date_check, by = c("ProgramName", "ProgramLocationID")) %>%
  left_join(station_depth_check, by = c("ProgramName", "ProgramLocationID")) %>%
  arrange(ProgramName, ProgramLocationID)

# Save dashboard stations file
write.csv(dashboard_stations, here::here("data", "exports", "dashboardStations.csv"), row.names = FALSE)

cat("Created dashboardStations.csv with", nrow(dashboard_stations), "stations\n")

Created dashboardStations.csv with 2475 stations

create individual station files

# Creates individual CSV files for each station named ProgramName.ProgramLocationID.csv

# Get unique stations
stations <- df %>%
  distinct(ProgramName, ProgramLocationID) %>%
  arrange(ProgramName, ProgramLocationID)

stationDataPath <- here::here("data", "exports", "stationData")
# create stationDataPath if DNE
dir.create(stationDataPath, showWarnings = FALSE)

# Create individual station files
for (i in 1:nrow(stations)) {
  station_name <- stations$ProgramName[i]
  station_id <- stations$ProgramLocationID[i]
  
  # Filter data for this station, drop coumns lat,lon,programName,station id 
  station_data <- df %>%
    filter(ProgramName == station_name, ProgramLocationID == station_id) %>%
    dplyr::select(-OriginalLatitude, -OriginalLongitude, -ProgramName, -ProgramLocationID)
  
  # Create filename (replace / in station_id with _ to avoid path issues)
  safe_station_id <- gsub("/", "_", station_id)
  filename <- paste0(station_name, ".", safe_station_id, ".csv")
  filepath <- here::here(stationDataPath, filename)
  
  # Save station data
  write.csv(station_data, filepath, row.names = FALSE)
}

cat("Created", nrow(stations), "individual station CSV files\n")

Created 2327 individual station CSV files