library('here')library('dplyr')source(here("R/getAllData.R"))# read from all source files# full_df <- getAllData() %>%# mutate(# source = program,# site = Monitoring.Location.ID,# datetime = Activity.Start.Date.Time,# analyte = DEP.Analyte.Name,# value = DEP.Result.Value.Number,# units = DEP.Result.Unit,# latitude = Org.Decimal.Latitude,# longitude = Org.Decimal.Longitude,# sample_depth = Activity.Depth,# .keep = "none"# )# read from cached file produced by index.qmdfull_df <-read.csv(here("data", "exports", "allData.csv"))df <-filter(full_df, analyte == params$analyte)
create .csv of analyte data
# save df to csv# reduce to only cols we need & save to csvdf %>%write.csv(here("data", "exports", "unified-wq-db-samples", paste0(params$analyte, ".csv")))
library(ggplot2)ggplot(df, aes(x = value)) +geom_histogram(bins =30, fill ="blue", color ="black") +labs(title ="Histogram of Values", x ="Value", y ="Count")
Station Statistics:
create station statistics dataframe
source(here("R/seasonalMannKendall.R"))library(lubridate) # for mdy_hms()library(pander) # for display# create table of samples for each stationsamples_df <- df %>%# drop any with empty Monitoring.Location.IDfilter(!is.na(site)) %>%# drop any with empty Activity.Start.Date.Timefilter(!is.na(datetime)) %>%# parse the "MM/DD/YYYY HH:MM:SS" strings into POSIXctmutate(datetime =mdy_hms( datetime,tz ="UTC")) %>%distinct()# add statistics for each stationsample_stats_df <- samples_df %>%group_by(source, site) %>%reframe( { tmp <-seasonalMannKendallVectorized( datetime, value ) },n_values =n(),mean =mean(value),min =min(value),max =max(value),coefficient.of.variation =sd(value) /mean(value) ) %>%mutate(# create column significant_slopesignificant_slope =ifelse(z <=0.05, slope, NA_real_),pvalue = z ) %>%# drop unwanted columns added by seasonalMannKendallselect(-z,-tau,-chi_square )# print(head(sample_stats_df))# # display sample_stats_df with pander# pander(sample_stats_df)
library(gt)library(scales)library(tidyselect) # for all_of()library(RColorBrewer) # for brewer.pal()# ── color_column() ─────────────────────────────────────────────────────────────# gt_tbl : a gt object that you’ve already created (e.g. `sample_stats_df %>% gt()`)# df : the original data.frame (must contain the column you want to color)# column : a string, e.g. "slope" or "n_values"# palette : a character vector of colours to feed to col_numeric()#color_column <-function(gt_tbl, df, column, palette =c("red", "orange", "yellow", "green", "blue", "violet"),domain =NULL) {# 1) Pull out that column’s numeric values vals <- df[[column]]if (!is.numeric(vals)) {stop(sprintf("`%s` is not numeric; data_color() requires a numeric column.", column)) }# 2) Compute its min and max (ignoring NA) min_val <-min(vals, na.rm =TRUE) max_val <-max(vals, na.rm =TRUE)if (is.null(domain)) { domain <-c(min_val, max_val) }# 3) Call data_color() on the gt table for that single column gt_tbl %>%data_color(columns =all_of(column),colors =col_numeric(palette = palette,domain = domain ) )}library(dplyr)library(gt)# 1) First build your gt table as usual:gt_tbl <- sample_stats_df %>%gt()# slope blue (-) to red (+) (0 centered)tryCatch({ min_slope <-min(sample_stats_df$slope, na.rm =TRUE) max_slope <-max(sample_stats_df$slope, na.rm =TRUE) max_abs_slope <-max(abs(min_slope), abs(max_slope)) gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="slope",palette =rev(brewer.pal(11, "RdBu")),domain =c(-max_abs_slope, max_abs_slope) )}, error =function(e) {print("Error in slope color column")print(e)})
[1] "Error in slope color column"
<rlang_error in col_numeric(palette = palette, domain = domain): Wasn't able to determine range of `domain`>
display with gt
tryCatch({# pvalue Z gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="z", palette = scales::brewer_pal(palette ="Blues")(9) )}, error =function(e) {print("Error in z color column")print(e)})
[1] "Error in z color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "z", palette = (scales::brewer_pal(palette = "Blues"))(9)): `z` is not numeric; data_color() requires a numeric column.>
display with gt
# slope blue (-) to red (+) (0 centered)tryCatch({ min_slope <-min(sample_stats_df$significant_slope, na.rm =TRUE) max_slope <-max(sample_stats_df$significant_slope, na.rm =TRUE) max_abs_slope <-max(abs(min_slope), abs(max_slope)) gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="significant_slope",palette =rev(brewer.pal(11, "RdBu")),domain =c(-max_abs_slope, max_abs_slope) )}, error =function(e) {print("Error in significant_slope color column")print(e)})
[1] "Error in significant_slope color column"
<simpleError in color_column(gt_tbl, df = sample_stats_df, column = "significant_slope", palette = rev(brewer.pal(11, "RdBu")), domain = c(-max_abs_slope, max_abs_slope)): `significant_slope` is not numeric; data_color() requires a numeric column.>
display with gt
tryCatch({# mean values blue to red (0 centered) min_mean <-min(sample_stats_df$mean, na.rm =TRUE) max_mean <-max(sample_stats_df$mean, na.rm =TRUE) max_abs_mean <-max(abs(min_mean), abs(max_mean)) gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="mean",palette =rev(brewer.pal(11, "RdBu")),domain =c(-max_abs_mean, max_abs_mean) )}, error =function(e) {print("Error in mean color column")print(e)})tryCatch({# n values white to green gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="n_values", palette = scales::brewer_pal(palette ="Greens")(9) )}, error =function(e) {print("Error in n_values color column")print(e)})tryCatch({# min gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="min", palette = scales::brewer_pal(palette ="Blues")(9) )}, error =function(e) {print("Error in min color column")print(e)})tryCatch({# max gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="max", palette = scales::brewer_pal(palette ="Blues")(9) )}, error =function(e) {print("Error in max color column")print(e)})tryCatch({# coefficient.of.variation gt_tbl <-color_column( gt_tbl, df = sample_stats_df, column ="coefficient.of.variation", palette = scales::brewer_pal(palette ="Blues")(9) )}, error =function(e) {print("Error in coefficient.of.variation color column")print(e)})# 4) Render/display:gt_tbl
source
site
slope
n_values
mean
min
max
coefficient.of.variation
significant_slope
pvalue
MiamiBeach
#1
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#10
NA
1
20.0000
20
20
NA
NA
NA
MiamiBeach
#11
NA
1
51.0000
51
51
NA
NA
NA
MiamiBeach
#12
NA
1
4350.0000
4350
4350
NA
NA
NA
MiamiBeach
#13
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#14
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#15
NA
1
20.0000
20
20
NA
NA
NA
MiamiBeach
#16
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#17
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#18
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#19
NA
1
NA
NA
NA
NA
NA
NA
MiamiBeach
#2
NA
1
31.0000
31
31
NA
NA
NA
MiamiBeach
#20
NA
1
20.0000
20
20
NA
NA
NA
MiamiBeach
#21
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#22
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#23
NA
1
63.0000
63
63
NA
NA
NA
MiamiBeach
#24
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#3
NA
1
NA
NA
NA
NA
NA
NA
MiamiBeach
#3A
NA
1
3650.0000
3650
3650
NA
NA
NA
MiamiBeach
#3B
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
#4
NA
1
NA
NA
NA
NA
NA
NA
MiamiBeach
#5
NA
1
20.0000
20
20
NA
NA
NA
MiamiBeach
#6
NA
1
359.0000
359
359
NA
NA
NA
MiamiBeach
#7
NA
1
31.0000
31
31
NA
NA
NA
MiamiBeach
#8
NA
1
20.0000
20
20
NA
NA
NA
MiamiBeach
#9
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
1
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
10
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
11
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
12
NA
11
661.1818
85
3430
1.50604
NA
NA
MiamiBeach
13
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
14
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
15
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
16
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
17
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
18
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
19
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
2
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
20
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
21
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
22
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
23
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
24
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
25
NA
10
NA
NA
NA
NA
NA
NA
MiamiBeach
26
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
27
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
28
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
29
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
3
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
30
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
31
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
32
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
33
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
34
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
35
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
36
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
37
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
38
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
39
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
3A
NA
8
NA
NA
NA
NA
NA
NA
MiamiBeach
3B
NA
9
NA
NA
NA
NA
NA
NA
MiamiBeach
3b
NA
1
10.0000
10
10
NA
NA
NA
MiamiBeach
4
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
40
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
41
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
5
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
6
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
7
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
8
NA
11
NA
NA
NA
NA
NA
NA
MiamiBeach
9
NA
11
NA
NA
NA
NA
NA
NA
Reporting Programs:
display reporting programs
library(ggplot2)ggplot(df, aes(x = source)) +geom_bar() +labs(title ="Reporting Programs", x ="Program", y ="Count")