Fecal_Coliforms Report

get data across all programs, filter for analyte
library('here')
library('dplyr')
source(here("R/getAllData.R"))

# read from all source files
# full_df <- getAllData() %>%
#   mutate(
#     source = program,
#     site = Monitoring.Location.ID,
#     datetime = Activity.Start.Date.Time,
#     analyte = DEP.Analyte.Name,
#     value = DEP.Result.Value.Number,
#     units = DEP.Result.Unit,
#     latitude = Org.Decimal.Latitude,
#     longitude = Org.Decimal.Longitude,
#     sample_depth = Activity.Depth,
#     .keep = "none"
# )

# read from cached file produced by index.qmd
full_df <- read.csv(here("data", "exports", "allData.csv"))

df <- filter(full_df, analyte == params$analyte)
create .csv of analyte data
# save df to csv
# reduce to only cols we need & save to csv
df %>%
  write.csv(here("data", "exports", "unified-wq-db-samples", paste0(params$analyte, ".csv")))

Download data for this analyte here

Histogram of value:

display histogram of values
library(ggplot2)
ggplot(df, aes(x = value)) +
  geom_histogram(bins = 30, fill = "blue", color = "black") +
  labs(title = "Histogram of Values", x = "Value", y = "Count")

Station Statistics:

create station statistics dataframe
source(here("R/seasonalMannKendall.R"))
library(lubridate)  # for mdy_hms()
library(pander)  # for display

# create table of samples for each station
samples_df <- df %>%
  # drop any with empty Monitoring.Location.ID
  filter(!is.na(site)) %>%
  # drop any with empty Activity.Start.Date.Time
  filter(!is.na(datetime)) %>%
  # parse the "MM/DD/YYYY HH:MM:SS" strings into POSIXct
  mutate(datetime = mdy_hms(
          datetime,
          tz = "UTC")) %>%
  distinct()


# add statistics for each station
sample_stats_df <- samples_df %>%
  group_by(source, site) %>%
  reframe(
    {
      tmp <- seasonalMannKendallVectorized(
        datetime,
        value
      )
    },
    n_values = n(),
    mean = mean(value),
    min = min(value),
    max = max(value),
    coefficient.of.variation = sd(value) / mean(value)
  ) %>%
  mutate(
    # create column significant_slope
    significant_slope = ifelse(z <= 0.05, slope, NA_real_),
    pvalue = z
  ) %>%
  # drop unwanted columns added by seasonalMannKendall
  select(
    -z,
    -tau,
    -chi_square
  )



# print(head(sample_stats_df))
# # display sample_stats_df with pander
# pander(sample_stats_df)
save stats to csv
sample_stats_df %>%
  write.csv(here(
    "data", "exports", "seasonal-mann-kendall-stats", paste0(params$analyte, ".csv")))

Download data for this analyte’s statistics here

display with gt
library(gt)
library(scales)
library(tidyselect)  # for all_of()
library(RColorBrewer) # for brewer.pal()

# ── color_column() ─────────────────────────────────────────────────────────────
# gt_tbl   : a gt object that you’ve already created (e.g. `sample_stats_df %>% gt()`)
# df       : the original data.frame (must contain the column you want to color)
# column   : a string, e.g. "slope" or "n_values"
# palette  : a character vector of colours to feed to col_numeric()
#
color_column <- function(gt_tbl, df, column, 
                         palette = c("red", "orange", "yellow", "green", "blue", "violet"),
                         domain = NULL) {
  # 1) Pull out that column’s numeric values
  vals <- df[[column]]
  if (!is.numeric(vals)) {
    stop(sprintf("`%s` is not numeric; data_color() requires a numeric column.", column))
  }
  
  # 2) Compute its min and max (ignoring NA)
  min_val <- min(vals, na.rm = TRUE)
  max_val <- max(vals, na.rm = TRUE)
  if (is.null(domain)) {
    domain <- c(min_val, max_val)
  }
  
  # 3) Call data_color() on the gt table for that single column
  gt_tbl %>%
    data_color(
      columns = all_of(column),
      colors  = col_numeric(
        palette = palette,
        domain  = domain
      )
    )
}
library(dplyr)
library(gt)

# 1) First build your gt table as usual:
gt_tbl <- sample_stats_df %>% 
  gt()

# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in slope color column")
  print(e)
})
[1] "Error in slope color column"
<rlang_error in col_numeric(palette = palette, domain = domain): Wasn't able to determine range of `domain`>
display with gt
tryCatch({
  # pvalue Z
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "z", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in z color column")
  print(e)
})
[1] "Error in z color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "z", palette = (scales::brewer_pal(palette = "Blues"))(9)): `z` is not numeric; data_color() requires a numeric column.>
display with gt
# slope blue (-) to red (+) (0 centered)
tryCatch({
  min_slope <- min(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_slope <- max(sample_stats_df$significant_slope,  na.rm = TRUE)
  max_abs_slope <- max(abs(min_slope), abs(max_slope))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "significant_slope",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_slope, max_abs_slope)
  )
}, error = function(e) {
  print("Error in significant_slope color column")
  print(e)
})
[1] "Error in significant_slope color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "significant_slope",     palette = rev(brewer.pal(11, "RdBu")), domain = c(-max_abs_slope,         max_abs_slope)): `significant_slope` is not numeric; data_color() requires a numeric column.>
display with gt
tryCatch({
  # mean values blue to red (0 centered)
  min_mean <- min(sample_stats_df$mean,  na.rm = TRUE)
  max_mean <- max(sample_stats_df$mean,  na.rm = TRUE)
  max_abs_mean <- max(abs(min_mean), abs(max_mean))
  gt_tbl <- color_column(
    gt_tbl, 
    df     = sample_stats_df, 
    column = "mean",
    palette = rev(brewer.pal(11, "RdBu")),
    domain  = c(-max_abs_mean, max_abs_mean)
  )
}, error = function(e) {
  print("Error in mean color column")
  print(e)
})

tryCatch({
  # n values white to green
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "n_values", 
    palette = scales::brewer_pal(palette = "Greens")(9)
  )
}, error = function(e) {
  print("Error in n_values color column")
  print(e)
})

tryCatch({
  # min
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "min", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in min color column")
  print(e)
})

tryCatch({
  # max
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "max", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in max color column")
  print(e)
})

tryCatch({
  # coefficient.of.variation
  gt_tbl <- color_column(
    gt_tbl, 
    df      = sample_stats_df, 
    column  = "coefficient.of.variation", 
    palette = scales::brewer_pal(palette = "Blues")(9)
  )
}, error = function(e) {
  print("Error in coefficient.of.variation color column")
  print(e)
})

# 4) Render/display:
gt_tbl
source site slope n_values mean min max coefficient.of.variation significant_slope pvalue
MiamiBeach #1 NA 1 7.000000 7 7 NA NA NA
MiamiBeach #10 NA 1 1.000000 1 1 NA NA NA
MiamiBeach #11 NA 1 49.000000 49 49 NA NA NA
MiamiBeach #12 NA 1 120.000000 120 120 NA NA NA
MiamiBeach #13 NA 1 6.000000 6 6 NA NA NA
MiamiBeach #14 NA 1 31.000000 31 31 NA NA NA
MiamiBeach #15 NA 1 3.000000 3 3 NA NA NA
MiamiBeach #16 NA 1 32.000000 32 32 NA NA NA
MiamiBeach #17 NA 1 32.000000 32 32 NA NA NA
MiamiBeach #18 NA 1 60.000000 60 60 NA NA NA
MiamiBeach #19 NA 1 16.000000 16 16 NA NA NA
MiamiBeach #2 NA 1 12.000000 12 12 NA NA NA
MiamiBeach #20 NA 1 9.000000 9 9 NA NA NA
MiamiBeach #21 NA 1 5.000000 5 5 NA NA NA
MiamiBeach #22 NA 1 3.000000 3 3 NA NA NA
MiamiBeach #23 NA 1 18.000000 18 18 NA NA NA
MiamiBeach #24 NA 1 2.000000 2 2 NA NA NA
MiamiBeach #3 NA 1 5.000000 5 5 NA NA NA
MiamiBeach #3A NA 1 70.000000 70 70 NA NA NA
MiamiBeach #3B NA 1 302.000000 302 302 NA NA NA
MiamiBeach #4 NA 1 1.000000 1 1 NA NA NA
MiamiBeach #5 NA 1 11.000000 11 11 NA NA NA
MiamiBeach #6 NA 1 110.000000 110 110 NA NA NA
MiamiBeach #7 NA 1 8.000000 8 8 NA NA NA
MiamiBeach #8 NA 1 19.000000 19 19 NA NA NA
MiamiBeach #9 NA 1 10.000000 10 10 NA NA NA
MiamiBeach 1 NA 11 NA NA NA NA NA NA
MiamiBeach 10 NA 11 NA NA NA NA NA NA
MiamiBeach 11 NA 11 NA NA NA NA NA NA
MiamiBeach 12 NA 11 NA NA NA NA NA NA
MiamiBeach 13 NA 11 NA NA NA NA NA NA
MiamiBeach 14 NA 11 NA NA NA NA NA NA
MiamiBeach 15 NA 11 NA NA NA NA NA NA
MiamiBeach 16 NA 11 NA NA NA NA NA NA
MiamiBeach 17 NA 11 NA NA NA NA NA NA
MiamiBeach 18 NA 11 NA NA NA NA NA NA
MiamiBeach 19 NA 11 NA NA NA NA NA NA
MiamiBeach 2 NA 11 9.363636 1 31 1.225387 NA NA
MiamiBeach 20 NA 11 NA NA NA NA NA NA
MiamiBeach 21 NA 11 NA NA NA NA NA NA
MiamiBeach 22 NA 11 NA NA NA NA NA NA
MiamiBeach 23 NA 11 NA NA NA NA NA NA
MiamiBeach 24 NA 11 NA NA NA NA NA NA
MiamiBeach 25 NA 10 NA NA NA NA NA NA
MiamiBeach 26 NA 11 NA NA NA NA NA NA
MiamiBeach 27 NA 11 NA NA NA NA NA NA
MiamiBeach 28 NA 11 NA NA NA NA NA NA
MiamiBeach 29 NA 11 NA NA NA NA NA NA
MiamiBeach 3 NA 11 NA NA NA NA NA NA
MiamiBeach 30 NA 11 NA NA NA NA NA NA
MiamiBeach 31 NA 11 11.545455 1 60 1.538402 NA NA
MiamiBeach 32 NA 11 NA NA NA NA NA NA
MiamiBeach 33 NA 11 NA NA NA NA NA NA
MiamiBeach 34 NA 11 NA NA NA NA NA NA
MiamiBeach 35 NA 11 NA NA NA NA NA NA
MiamiBeach 36 NA 11 NA NA NA NA NA NA
MiamiBeach 37 NA 11 NA NA NA NA NA NA
MiamiBeach 38 NA 11 31.818182 1 117 1.034073 NA NA
MiamiBeach 39 NA 11 NA NA NA NA NA NA
MiamiBeach 3A NA 8 NA NA NA NA NA NA
MiamiBeach 3B NA 9 NA NA NA NA NA NA
MiamiBeach 3b NA 1 9.000000 9 9 NA NA NA
MiamiBeach 4 NA 11 NA NA NA NA NA NA
MiamiBeach 40 NA 11 36.363636 1 110 1.141517 NA NA
MiamiBeach 41 NA 11 NA NA NA NA NA NA
MiamiBeach 5 NA 11 NA NA NA NA NA NA
MiamiBeach 6 NA 11 NA NA NA NA NA NA
MiamiBeach 7 NA 11 NA NA NA NA NA NA
MiamiBeach 8 NA 11 NA NA NA NA NA NA
MiamiBeach 9 NA 11 NA NA NA NA NA NA

Reporting Programs:

display reporting programs
library(ggplot2)
ggplot(df, aes(x = source)) +
  geom_bar() +
  labs(title = "Reporting Programs", x = "Program", y = "Count")