Enterococci Report

get data across all programs, filter for analyte
library('here')
library('dplyr')
source(here("R/getAllData.R"))

# read from all source files
# full_df <- getAllData() %>%
#   mutate(
#     source = program,
#     site = Monitoring.Location.ID,
#     datetime = Activity.Start.Date.Time,
#     analyte = DEP.Analyte.Name,
#     value = DEP.Result.Value.Number,
#     units = DEP.Result.Unit,
#     latitude = Org.Decimal.Latitude,
#     longitude = Org.Decimal.Longitude,
#     sample_depth = Activity.Depth,
#     .keep = "none"
# )

# read from cached file produced by index.qmd
full_df <- read.csv(here("data", "exports", "allData.csv"))

df <- filter(full_df, analyte == params$analyte)
create .csv of analyte data
# save df to csv
# reduce to only cols we need & save to csv
df %>%
  write.csv(here("data", "exports", "unified-wq-db-samples", paste0(params$analyte, ".csv")))

Download data for this analyte here

Histogram of value:

display histogram of values
library(ggplot2)
ggplot(df, aes(x = value)) +
  geom_histogram(bins = 30, fill = "blue", color = "black") +
  labs(title = "Histogram of Values", x = "Value", y = "Count")

Station Statistics:

create station statistics dataframe
source(here("R/seasonalMannKendall.R"))
library(lubridate)  # for mdy_hms()
library(pander)  # for display

# create table of samples for each station
samples_df <- df %>%
  # drop any with empty Monitoring.Location.ID
  filter(!is.na(site)) %>%
  # drop any with empty Activity.Start.Date.Time
  filter(!is.na(datetime)) %>%
  # parse the "MM/DD/YYYY HH:MM:SS" strings into POSIXct
  mutate(datetime = mdy_hms(
          datetime,
          tz = "UTC")) %>%
  distinct()


# add statistics for each station
sample_stats_df <- samples_df %>%
  group_by(source, site) %>%
  reframe(
    {
      tmp <- seasonalMannKendallVectorized(
        datetime,
        value
      )
    },
    n_values = n(),
    mean = mean(value),
    min = min(value),
    max = max(value),
    coefficient.of.variation = sd(value) / mean(value)
  ) %>%
  mutate(
    # create column significant_slope
    significant_slope = ifelse(z <= 0.05, slope, NA_real_),
    pvalue = z
  ) %>%
  # drop unwanted columns added by seasonalMannKendall
  select(
    -z,
    -tau,
    -chi_square
  )



# print(head(sample_stats_df))
# # display sample_stats_df with pander
# pander(sample_stats_df)
save stats to csv
sample_stats_df %>%
  write.csv(here(
    "data", "exports", "seasonal-mann-kendall-stats", paste0(params$analyte, ".csv")))

Download data for this analyte’s statistics here

display with gt
library(gt)
library(scales)
library(tidyselect)  # for all_of()
library(RColorBrewer) # for brewer.pal()

# ── color_column() ─────────────────────────────────────────────────────────────
# gt_tbl   : a gt object that you’ve already created (e.g. `sample_stats_df %>% gt()`)
# df       : the original data.frame (must contain the column you want to color)
# column   : a string, e.g. "slope" or "n_values"
# palette  : a character vector of colours to feed to col_numeric()
#
color_column <- function(gt_tbl, df, column, 
                         palette = c("red", "orange", "yellow", "green", "blue", "violet"),
                         domain = NULL) {
  # 1) Pull out that column’s numeric values
  vals <- df[[column]]
  if (!is.numeric(vals)) {
    stop(sprintf("`%s` is not numeric; data_color() requires a numeric column.", column))
  }
  
  # 2) Compute its min and max (ignoring NA)
  min_val <- min(vals, na.rm = TRUE)
  max_val <- max(vals, na.rm = TRUE)
  if (is.null(domain)) {
    domain <- c(min_val, max_val)
  }
  
  # 3) Call data_color() on the gt table for that single column
  gt_tbl %>%
    data_color(
      columns = all_of(column),
      colors  = col_numeric(
        palette = palette,
        domain  = domain
      )
    )
}
library(dplyr)
library(gt)

# 1) First build your gt table as usual:
gt_tbl <- sample_stats_df %>% 
  gt()

# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in slope color column")
  print(e)
})
[1] "Error in slope color column"
<rlang_error in col_numeric(palette = palette, domain = domain): Wasn't able to determine range of `domain`>
display with gt
tryCatch({
  # pvalue Z
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "z", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in z color column")
  print(e)
})
[1] "Error in z color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "z", palette = (scales::brewer_pal(palette = "Blues"))(9)): `z` is not numeric; data_color() requires a numeric column.>
display with gt
# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "significant_slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in significant_slope color column")
  print(e)
})
[1] "Error in significant_slope color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "significant_slope",     palette = rev(brewer.pal(11, "RdBu")), domain = c(-max_abs_slope,         max_abs_slope)): `significant_slope` is not numeric; data_color() requires a numeric column.>
display with gt
tryCatch({
  # mean values blue to red (0 centered)
  min_mean <- min(sample_stats_df$mean,  na.rm = TRUE)
  max_mean <- max(sample_stats_df$mean,  na.rm = TRUE)
  max_abs_mean <- max(abs(min_mean), abs(max_mean))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "mean",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_mean, max_abs_mean)
  )
}, error = function(e) {
  print("Error in mean color column")
  print(e)
})

tryCatch({
  # n values white to green
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "n_values", 
    palette = scales::brewer_pal(palette = "Greens")(9)
  )
}, error = function(e) {
  print("Error in n_values color column")
  print(e)
})

tryCatch({
  # min
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "min", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in min color column")
  print(e)
})

tryCatch({
  # max
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "max", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in max color column")
  print(e)
})

tryCatch({
  # coefficient.of.variation
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "coefficient.of.variation", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in coefficient.of.variation color column")
  print(e)
})

# 4) Render/display:
gt_tbl
source site slope n_values mean min max coefficient.of.variation significant_slope pvalue
MiamiBeach #1 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #10 NA 1 20.0000 20 20 NA NA NA
MiamiBeach #11 NA 1 51.0000 51 51 NA NA NA
MiamiBeach #12 NA 1 4350.0000 4350 4350 NA NA NA
MiamiBeach #13 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #14 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #15 NA 1 20.0000 20 20 NA NA NA
MiamiBeach #16 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #17 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #18 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #19 NA 1 NA NA NA NA NA NA
MiamiBeach #2 NA 1 31.0000 31 31 NA NA NA
MiamiBeach #20 NA 1 20.0000 20 20 NA NA NA
MiamiBeach #21 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #22 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #23 NA 1 63.0000 63 63 NA NA NA
MiamiBeach #24 NA 1 10.0000 10 10 NA NA NA
MiamiBeach #3 NA 1 NA NA NA NA NA NA
MiamiBeach #3A NA 1 3650.0000 3650 3650 NA NA NA
MiamiBeach #3B NA 1 10.0000 10 10 NA NA NA
MiamiBeach #4 NA 1 NA NA NA NA NA NA
MiamiBeach #5 NA 1 20.0000 20 20 NA NA NA
MiamiBeach #6 NA 1 359.0000 359 359 NA NA NA
MiamiBeach #7 NA 1 31.0000 31 31 NA NA NA
MiamiBeach #8 NA 1 20.0000 20 20 NA NA NA
MiamiBeach #9 NA 1 10.0000 10 10 NA NA NA
MiamiBeach 1 NA 11 NA NA NA NA NA NA
MiamiBeach 10 NA 11 NA NA NA NA NA NA
MiamiBeach 11 NA 11 NA NA NA NA NA NA
MiamiBeach 12 NA 11 661.1818 85 3430 1.50604 NA NA
MiamiBeach 13 NA 11 NA NA NA NA NA NA
MiamiBeach 14 NA 11 NA NA NA NA NA NA
MiamiBeach 15 NA 11 NA NA NA NA NA NA
MiamiBeach 16 NA 11 NA NA NA NA NA NA
MiamiBeach 17 NA 11 NA NA NA NA NA NA
MiamiBeach 18 NA 11 NA NA NA NA NA NA
MiamiBeach 19 NA 11 NA NA NA NA NA NA
MiamiBeach 2 NA 11 NA NA NA NA NA NA
MiamiBeach 20 NA 11 NA NA NA NA NA NA
MiamiBeach 21 NA 11 NA NA NA NA NA NA
MiamiBeach 22 NA 11 NA NA NA NA NA NA
MiamiBeach 23 NA 11 NA NA NA NA NA NA
MiamiBeach 24 NA 11 NA NA NA NA NA NA
MiamiBeach 25 NA 10 NA NA NA NA NA NA
MiamiBeach 26 NA 11 NA NA NA NA NA NA
MiamiBeach 27 NA 11 NA NA NA NA NA NA
MiamiBeach 28 NA 11 NA NA NA NA NA NA
MiamiBeach 29 NA 11 NA NA NA NA NA NA
MiamiBeach 3 NA 11 NA NA NA NA NA NA
MiamiBeach 30 NA 11 NA NA NA NA NA NA
MiamiBeach 31 NA 11 NA NA NA NA NA NA
MiamiBeach 32 NA 11 NA NA NA NA NA NA
MiamiBeach 33 NA 11 NA NA NA NA NA NA
MiamiBeach 34 NA 11 NA NA NA NA NA NA
MiamiBeach 35 NA 11 NA NA NA NA NA NA
MiamiBeach 36 NA 11 NA NA NA NA NA NA
MiamiBeach 37 NA 11 NA NA NA NA NA NA
MiamiBeach 38 NA 11 NA NA NA NA NA NA
MiamiBeach 39 NA 11 NA NA NA NA NA NA
MiamiBeach 3A NA 8 NA NA NA NA NA NA
MiamiBeach 3B NA 9 NA NA NA NA NA NA
MiamiBeach 3b NA 1 10.0000 10 10 NA NA NA
MiamiBeach 4 NA 11 NA NA NA NA NA NA
MiamiBeach 40 NA 11 NA NA NA NA NA NA
MiamiBeach 41 NA 11 NA NA NA NA NA NA
MiamiBeach 5 NA 11 NA NA NA NA NA NA
MiamiBeach 6 NA 11 NA NA NA NA NA NA
MiamiBeach 7 NA 11 NA NA NA NA NA NA
MiamiBeach 8 NA 11 NA NA NA NA NA NA
MiamiBeach 9 NA 11 NA NA NA NA NA NA

Reporting Programs:

display reporting programs
library(ggplot2)
ggplot(df, aes(x = source)) +
  geom_bar() +
  labs(title = "Reporting Programs", x = "Program", y = "Count")