get data across all programs
library("here")
source(here("R/getAllData.R"))
<- getAllData() df
Data ingestion and initial analysis from FL WIN water quality database and other sources. Below are some statistics on data across all analytes and programs. For more information on specific analytes and providers, see the analyte reports and provider reports.
Data for each analyte can be downloaded from the relevant analyte report. The full compiled data can be downloaded from the University of South Florida here
Florida Coral Reef Water Quality Database Compilation (FCRWQDC). This work is a product of the University of South Florida Institute for Marine Remote Sensing (IMaRS), funded by the Florida Department of Environmental Protection (FDEP).
# reduce to only cols we need & save to csv
df %>%
mutate(
source = program,
site = Monitoring.Location.ID,
datetime = Activity.Start.Date.Time,
analyte = DEP.Analyte.Name,
value = DEP.Result.Value.Number,
units = DEP.Result.Unit,
latitude = Org.Decimal.Latitude,
longitude = Org.Decimal.Longitude,
sample_depth = Activity.Depth,
.keep = "none") %>%
write.csv(here("data", "exports", "allData.csv"))
[1] "Temperature" "Salinity"
[3] "Ammonium" "Nitrite"
[5] "Nitrate" "Nitrate+Nitrite"
[7] "Orthophosphate" "Silicate"
[9] "Chlorophyll_a" "Pheophytin"
[11] "Dissolved_Oxygen" "pH"
[13] "Specific_Conductivity" "Turbidity"
[15] "Total_Kjeldahl_Nitrogen" "Phosphorus"
[17] "Total_Nitrogen" "Fecal_Coliforms"
[19] "Enterococci" "Ammonia"
[21] "Ammonia_plus_Ammonium" "Phosphorus__orthophosphate_as_PO4"
[23] "Phosphorus_as_PO4" "Nitrate.Nitrite"
[25] "TN" "TP"
[27] "Si"
Name | df |
Number of rows | 945624 |
Number of columns | 95 |
_______________________ | |
Column type frequency: | |
character | 66 |
logical | 3 |
numeric | 26 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
DEP.Result.ID | 567407 | 0.40 | 7 | 43 | 0 | 325598 | 0 |
Activity.ID | 567407 | 0.40 | 7 | 33 | 0 | 86374 | 0 |
time | 887184 | 0.06 | 5 | 5 | 0 | 1421 | 0 |
Activity.Start.Date.Time | 95 | 1.00 | 0 | 19 | 9950 | 48020 | 0 |
Monitoring.Location.ID | 1508 | 1.00 | 0 | 14 | 22213 | 1751 | 0 |
Activity.Type | 567407 | 0.40 | 1 | 35 | 0 | 14 | 0 |
Activity.Depth.Unit | 567407 | 0.40 | 0 | 7 | 24857 | 5 | 0 |
Activity.Depth.Top.Bottom.Unit | 887184 | 0.06 | 4 | 4 | 0 | 2 | 0 |
Sample.Collection.Type | 567407 | 0.40 | 0 | 22 | 10452 | 7 | 0 |
Value.Qualifier | 406239 | 0.57 | 0 | 5 | 184005 | 127 | 0 |
Result.Comments | 625804 | 0.34 | 0 | 874 | 280333 | 936 | 0 |
DEP.Analyte.Name | 0 | 1.00 | 2 | 33 | 0 | 27 | 0 |
DEP.Result.Unit | 5376 | 0.99 | 0 | 10 | 24230 | 20 | 0 |
original.analyte.name | 0 | 1.00 | 2 | 44 | 0 | 67 | 0 |
program | 0 | 1.00 | 3 | 10 | 0 | 8 | 0 |
ProgramName | 922583 | 0.02 | 36 | 36 | 0 | 1 | 0 |
Habitat | 922583 | 0.02 | 12 | 12 | 0 | 1 | 0 |
IndicatorName | 922583 | 0.02 | 9 | 13 | 0 | 3 | 0 |
ParameterName | 922583 | 0.02 | 2 | 23 | 0 | 10 | 0 |
ParameterUnits | 922583 | 0.02 | 3 | 9 | 0 | 6 | 0 |
ProgramLocationID | 922583 | 0.02 | 1 | 2 | 0 | 55 | 0 |
ManagedAreaName | 922583 | 0.02 | 29 | 29 | 0 | 1 | 0 |
Region | 922583 | 0.02 | 2 | 2 | 0 | 1 | 0 |
ActivityType | 922583 | 0.02 | 5 | 6 | 0 | 2 | 0 |
SampleDate | 922583 | 0.02 | 23 | 23 | 0 | 2134 | 0 |
ActivityDepth_m | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
RelativeDepth | 922583 | 0.02 | 7 | 7 | 0 | 1 | 0 |
TotalDepth_m | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
MDL | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
PQL | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
DetectionUnit | 922583 | 0.02 | 4 | 4 | 0 | 2 | 0 |
ValueQualifier | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
ValueQualifierSource | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
SampleFraction | 922583 | 0.02 | 4 | 4 | 0 | 1 | 0 |
ResultComments | 922583 | 0.02 | 4 | 21 | 0 | 2 | 0 |
SEACAR_QAQCFlagCode | 922583 | 0.02 | 6 | 13 | 0 | 23 | 0 |
SEACAR_QAQC_Description | 922583 | 0.02 | 98 | 195 | 0 | 23 | 0 |
SEACAR_EventID | 922583 | 0.02 | 36 | 36 | 0 | 2394 | 0 |
ExportVersion | 922583 | 0.02 | 23 | 23 | 0 | 2 | 0 |
data_source | 916658 | 0.03 | 5 | 10 | 0 | 2 | 0 |
CLIENT SAMPLE ID | 939699 | 0.01 | 1 | 3 | 0 | 70 | 0 |
LAB SAMPLE ID | 939699 | 0.01 | 11 | 11 | 0 | 494 | 0 |
MATRIX | 939699 | 0.01 | 5 | 5 | 0 | 1 | 0 |
COLLECTED | 939699 | 0.01 | 10 | 10 | 0 | 23 | 0 |
ANALYTE | 939699 | 0.01 | 8 | 27 | 0 | 12 | 0 |
SAMPLE RESULT | 940669 | 0.01 | 3 | 9 | 0 | 1545 | 0 |
REPORTING LIMIT | 942167 | 0.00 | 1 | 6 | 0 | 10 | 0 |
UNITS | 939699 | 0.01 | 3 | 10 | 0 | 8 | 0 |
METHOD | 939699 | 0.01 | 10 | 29 | 0 | 8 | 0 |
DILUTION | 939699 | 0.01 | 1 | 2 | 0 | 5 | 0 |
ANALYZED | 939699 | 0.01 | 10 | 10 | 0 | 123 | 0 |
PREPARED | 939699 | 0.01 | 10 | 10 | 0 | 127 | 0 |
source_file | 939699 | 0.01 | 19 | 22 | 0 | 23 | 0 |
SAMPLE COLLECTION DATE | 939699 | 0.01 | 9 | 10 | 0 | 23 | 0 |
Organization.ID | 87406 | 0.91 | 7 | 9 | 0 | 10 | 0 |
Org.Latitude..DD.MM.SS.SSSS. | 844604 | 0.11 | 0 | 11 | 90448 | 22 | 0 |
Org.Longitude..DD.MM.SS.SSSS. | 844604 | 0.11 | 0 | 12 | 90448 | 22 | 0 |
WBID | 625847 | 0.34 | 0 | 6 | 37805 | 115 | 0 |
Matrix | 625847 | 0.34 | 13 | 21 | 0 | 4 | 0 |
Sampling.Agency.Name | 92782 | 0.90 | 7 | 58 | 0 | 9 | 0 |
DEP.Result.Value.Text | 736350 | 0.22 | 0 | 12 | 201123 | 2 | 0 |
Sample.Fraction | 625847 | 0.34 | 0 | 9 | 131120 | 3 | 0 |
Lab.ID | 625847 | 0.34 | 0 | 6 | 102313 | 10 | 0 |
source | 625847 | 0.34 | 3 | 9 | 0 | 6 | 0 |
Date | 940248 | 0.01 | 11 | 14 | 0 | 664 | 0 |
datetime | 940280 | 0.01 | 17 | 17 | 0 | 661 | 0 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
Activity.Top.Depth | 945624 | 0 | NaN | : |
Activity.Bottom.Depth | 945624 | 0 | NaN | : |
Audit.Censored.Decisions | 945624 | 0 | NaN | : |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 887184 | 0.06 | 2020.14 | 2.99 | 2014.00 | 2018.00 | 2021.00 | 2023.00 | 2024.00 | ▅▅▅▇▇ |
month | 887184 | 0.06 | 6.60 | 3.56 | 1.00 | 3.00 | 7.00 | 10.00 | 12.00 | ▇▅▅▅▇ |
day | 887184 | 0.06 | 14.53 | 8.56 | 1.00 | 7.00 | 14.00 | 22.00 | 31.00 | ▇▆▆▆▃ |
lat_deg | 887184 | 0.06 | 25.10 | 0.99 | 24.00 | 24.00 | 25.00 | 25.00 | 28.00 | ▅▇▂▂▁ |
lat_min | 887184 | 0.06 | 30.31 | 15.93 | 0.03 | 20.42 | 33.09 | 42.61 | 59.83 | ▆▅▇▇▅ |
Org.Decimal.Latitude | 568087 | 0.40 | 25.78 | 0.65 | 24.40 | 25.47 | 25.79 | 26.09 | 28.78 | ▂▇▂▁▁ |
lon_deg | 887184 | 0.06 | -81.14 | 0.82 | -85.00 | -82.00 | -81.00 | -81.00 | -80.00 | ▁▁▃▇▃ |
lon_min | 887184 | 0.06 | 26.48 | 17.05 | 0.00 | 12.17 | 22.69 | 42.98 | 60.00 | ▇▇▅▃▅ |
Org.Decimal.Longitude | 568087 | 0.40 | -80.51 | 0.69 | -85.02 | -80.44 | -80.21 | -80.12 | -79.29 | ▁▁▁▂▇ |
Activity.Depth | 544803 | 0.42 | 0.41 | 0.23 | 0.00 | 0.12 | 0.50 | 0.50 | 1.00 | ▃▁▇▂▁ |
DEP.Result.Value.Number | 22840 | 0.98 | 11.13 | 527.26 | -0.03 | 0.10 | 0.50 | 2.10 | 53465.00 | ▇▁▁▁▁ |
RowID | 922583 | 0.02 | 1009723.34 | 798651.12 | 486217.00 | 514368.00 | 534968.00 | 1551534.00 | 3870318.00 | ▇▂▁▁▁ |
ProgramID | 922583 | 0.02 | 4058.00 | 0.00 | 4058.00 | 4058.00 | 4058.00 | 4058.00 | 4058.00 | ▁▁▇▁▁ |
IndicatorID | 922583 | 0.02 | 6.72 | 0.64 | 6.00 | 6.00 | 7.00 | 7.00 | 8.00 | ▆▁▇▁▂ |
ParameterID | 922583 | 0.02 | 8.84 | 6.57 | 1.00 | 3.00 | 6.00 | 16.00 | 19.00 | ▇▅▁▂▆ |
AreaID | 922583 | 0.02 | 6.00 | 0.00 | 6.00 | 6.00 | 6.00 | 6.00 | 6.00 | ▁▁▇▁▁ |
ResultValue | 922583 | 0.02 | 12.86 | 41.22 | 0.00 | 0.29 | 4.89 | 24.47 | 5389.00 | ▇▁▁▁▁ |
Year | 922583 | 0.02 | 2019.17 | 2.12 | 2016.00 | 2017.00 | 2019.00 | 2021.00 | 2023.00 | ▇▆▇▃▅ |
Month | 922583 | 0.02 | 6.55 | 3.44 | 1.00 | 3.00 | 7.00 | 9.00 | 12.00 | ▇▅▅▆▇ |
OriginalLatitude | 922583 | 0.02 | 25.80 | 0.02 | 25.77 | 25.78 | 25.79 | 25.81 | 25.87 | ▆▇▂▁▂ |
OriginalLongitude | 922583 | 0.02 | -80.15 | 0.01 | -80.17 | -80.16 | -80.15 | -80.13 | -80.12 | ▇▆▅▅▇ |
Include | 922583 | 0.02 | 1.00 | 0.04 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
MADup | 922583 | 0.02 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
DEP.MDL | 735993 | 0.22 | 0.04 | 0.47 | 0.00 | 0.00 | 0.01 | 0.06 | 50.50 | ▇▁▁▁▁ |
DEP.PQL | 735993 | 0.22 | 0.15 | 2.28 | 0.00 | 0.01 | 0.03 | 0.10 | 250.00 | ▇▁▁▁▁ |
Station | 940248 | 0.01 | 289.71 | 80.79 | 200.00 | 231.75 | 270.50 | 314.25 | 509.00 | ▇▇▁▁▂ |
library(dplyr)
library(reshape2) # for melt()
library(ggplot2)
library(viridis)
library(RColorBrewer)# for scale_fill_distiller()
# 1. Extract & drop NA
vals_raw <- df$DEP.Result.Value.Number
vals_raw <- vals_raw[!is.na(vals_raw)]
# 2. Log-transform
v1 <- log10(vals_raw + 1)
# 3. Percentile of the log-values
pct1 <- ecdf(v1)(v1)
# 4. Grid dims
N <- length(pct1)
ncol <- ceiling(sqrt(N))
nrow <- ceiling(N / ncol)
# 5. Pad
pad_len <- (nrow * ncol) - N
p1_pad <- c(pct1, rep(NA, pad_len))
# 6. Matrix & melt
mat_p1 <- matrix(p1_pad, nrow = nrow, ncol = ncol, byrow = TRUE)
mat_long_p1 <- melt(mat_p1, varnames = c("row","col"), value.name = "pct_log")
# 7. Plot
ggplot(mat_long_p1, aes(x = col, y = row, fill = pct_log)) +
geom_tile(color = NA) +
scale_fill_distiller(
palette = "Spectral", # try "RdYlBu", "PuOr", "BrBG", etc.
direction = 1, # reverse=FALSE so low values start at red-ish end
na.value = "grey90", # color for the padded NA cells
guide = "none" # hide the legend; remove if you want a colorbar
) +
scale_y_reverse() +
theme_void() +
theme(legend.position = "none")
If you have visualization ideas for this data, please open a github issue here.