get data across all programs
library("here")
<- read.csv(here("data/exports/allDataRaw.csv"))
df_all source(here("R/dropNonStandardAnalytes.R"))
<- dropNonStandardAnalytes(df_all) df
Data ingestion and initial analysis from FL WIN water quality database and other sources. Below are some statistics on data across all analytes and programs. For more information on specific analytes and providers, see the analyte reports and provider reports.
Data for each analyte can be downloaded from the relevant analyte report. The full compiled data can be downloaded from the University of South Florida here
Florida Coral Reef Water Quality Database Compilation (FCRWQDC). This work is a product of the University of South Florida Institute for Marine Remote Sensing (IMaRS), funded by the Florida Department of Environmental Protection (FDEP).
library(ggplot2)
ggplot(df, aes(x = program, fill = DEP.Analyte.Name)) +
geom_bar() +
scale_y_log10() +
labs(
title = "Count of Observations by Program and Parameter",
x = "Program Name",
y = "Count (Log Scale)",
fill = "Parameter"
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
The following analytes are reported by some programs but not included in the output csv files or other analyses. These may be analytes that are not included in the SEACAR vocabulary or they may not have been properly mapped to the SEACAR vocabulary.
library(dplyr)
# 1. Get the list of analyte names that are in df_all but not in df
diff_analytes <- setdiff(unique(df_all$DEP.Analyte.Name), unique(df$DEP.Analyte.Name))
# 2. Filter df_all for those analytes and create a summary
analyte_programs_summary <- df_all %>%
filter(DEP.Analyte.Name %in% diff_analytes) %>%
group_by(DEP.Analyte.Name) %>%
summarise(Associated.Programs = paste(unique(program), collapse = ", "))
# 3. Print the resulting summary table
DT::datatable(analyte_programs_summary)
Name | df |
Number of rows | 1235477 |
Number of columns | 133 |
_______________________ | |
Column type frequency: | |
character | 63 |
complex | 1 |
logical | 16 |
numeric | 53 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
keyfield | 1159247 | 0.06 | 24 | 43 | 0 | 8468 | 0 |
Activity.ID | 910055 | 0.26 | 5 | 36 | 0 | 75763 | 0 |
time | 1159247 | 0.06 | 5 | 5 | 0 | 1423 | 0 |
Activity.Start.Date.Time | 564685 | 0.54 | 8 | 10 | 0 | 2610 | 0 |
Monitoring.Location.ID | 18 | 1.00 | 0 | 25 | 16624 | 2013 | 0 |
station_type | 1159265 | 0.06 | 1 | 1 | 0 | 2 | 0 |
depth_class | 1159265 | 0.06 | 3 | 7 | 0 | 3 | 0 |
depth_order | 1159265 | 0.06 | 4 | 5 | 0 | 12 | 0 |
notes | 1235405 | 0.00 | 1 | 75 | 0 | 8 | 9 |
DEP.Analyte.Name | 0 | 1.00 | 2 | 41 | 0 | 20 | 0 |
Habitat | 963802 | 0.22 | 12 | 21 | 0 | 5 | 0 |
IndicatorName | 1211526 | 0.02 | 9 | 13 | 0 | 3 | 0 |
ManagedAreaName | 1211526 | 0.02 | 29 | 38 | 0 | 2 | 0 |
Activity.Type | 986843 | 0.20 | 5 | 35 | 0 | 12 | 0 |
RelativeDepth | 1135314 | 0.08 | 3 | 7 | 0 | 3 | 0 |
DetectionUnit | 1233311 | 0.00 | 4 | 4 | 0 | 1 | 0 |
Value.Qualifier | 788644 | 0.36 | 0 | 4 | 163070 | 93 | 0 |
Result.Comments | 987681 | 0.20 | 0 | 874 | 221332 | 768 | 9 |
SEACAR_QAQCFlagCode | 1211526 | 0.02 | 2 | 13 | 0 | 26 | 0 |
SEACAR_QAQC_Description | 1211526 | 0.02 | 31 | 195 | 0 | 26 | 0 |
ExportVersion | 1211526 | 0.02 | 23 | 23 | 0 | 3 | 0 |
Region | 1134858 | 0.08 | 2 | 19 | 0 | 22 | 0 |
DEP.Result.Unit | 79580 | 0.94 | 0 | 10 | 26408 | 20 | 0 |
program | 0 | 1.00 | 3 | 19 | 0 | 16 | 0 |
ProgramName | 1211526 | 0.02 | 25 | 36 | 0 | 2 | 0 |
ParameterName | 1212436 | 0.02 | 2 | 23 | 0 | 10 | 0 |
ParameterUnits | 1212436 | 0.02 | 3 | 9 | 0 | 6 | 0 |
ProgramLocationID | 1212436 | 0.02 | 1 | 2 | 0 | 55 | 0 |
ActivityType | 1212436 | 0.02 | 5 | 6 | 0 | 2 | 0 |
SampleDate | 1212436 | 0.02 | 23 | 23 | 0 | 2134 | 0 |
ResultComments | 1233311 | 0.00 | 21 | 21 | 0 | 1 | 0 |
SEACAR_EventID | 1212436 | 0.02 | 36 | 36 | 0 | 2394 | 0 |
data_source | 1207993 | 0.02 | 5 | 10 | 0 | 2 | 0 |
CLIENT.SAMPLE.ID | 1231034 | 0.00 | 1 | 3 | 0 | 70 | 0 |
MATRIX | 1231034 | 0.00 | 5 | 5 | 0 | 1 | 0 |
COLLECTED | 1231034 | 0.00 | 10 | 10 | 0 | 23 | 0 |
ANALYTE | 1231034 | 0.00 | 8 | 27 | 0 | 9 | 0 |
SAMPLE.RESULT | 1231380 | 0.00 | 3 | 6 | 0 | 1385 | 0 |
UNITS | 1231034 | 0.00 | 3 | 10 | 0 | 6 | 0 |
METHOD | 1231034 | 0.00 | 10 | 29 | 0 | 5 | 0 |
ANALYZED | 1231034 | 0.00 | 10 | 10 | 0 | 104 | 0 |
PREPARED | 1231034 | 0.00 | 10 | 10 | 0 | 95 | 0 |
source_file | 1231034 | 0.00 | 19 | 22 | 0 | 23 | 0 |
SAMPLE.COLLECTION.DATE | 1231034 | 0.00 | 9 | 10 | 0 | 23 | 0 |
BASIN | 912731 | 0.26 | 2 | 5 | 0 | 7 | 0 |
CLUSTER | 920123 | 0.26 | 2 | 4 | 0 | 37 | 0 |
ZSI | 921124 | 0.25 | 2 | 5 | 0 | 25 | 0 |
ZONE | 1149006 | 0.07 | 3 | 7 | 0 | 10 | 0 |
Date | 1232127 | 0.00 | 11 | 14 | 0 | 663 | 0 |
Organization.ID | 504158 | 0.59 | 7 | 16 | 0 | 10 | 0 |
SEGMENT | 1228085 | 0.01 | 2 | 5 | 0 | 8 | 0 |
DATE | 1228085 | 0.01 | 10 | 19 | 0 | 260 | 0 |
SITE | 1228085 | 0.01 | 8 | 28 | 0 | 52 | 0 |
Sampling.Agency.Name | 507508 | 0.59 | 14 | 58 | 0 | 9 | 0 |
Org.Latitude..DD.MM.SS.SSSS. | 987753 | 0.20 | 0 | 11 | 239409 | 22 | 0 |
Org.Longitude..DD.MM.SS.SSSS. | 987753 | 0.20 | 0 | 12 | 239409 | 22 | 0 |
WBID | 987753 | 0.20 | 0 | 6 | 27052 | 115 | 0 |
Sample.Collection.Type | 987753 | 0.20 | 0 | 22 | 8002 | 4 | 0 |
Activity.Depth.Unit | 987753 | 0.20 | 0 | 2 | 17317 | 3 | 0 |
DEP.Result.Value.Text | 987753 | 0.20 | 0 | 12 | 241502 | 2 | 0 |
Sample.Fraction | 987753 | 0.20 | 0 | 9 | 131120 | 3 | 0 |
Lab.ID | 987753 | 0.20 | 0 | 6 | 102313 | 10 | 0 |
source | 987753 | 0.20 | 3 | 9 | 0 | 6 | 0 |
Variable type: complex
skim_variable | n_missing | complete_rate | mean |
---|---|---|---|
STATION | 1228085 | 0.01 | 313.52+0.46i |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
nisk_start | 1235477 | 0 | NaN | : |
nisk_end | 1235477 | 0 | NaN | : |
ValueQualifierSource | 1235477 | 0 | NaN | : |
ActivityDepth_m | 1235477 | 0 | NaN | : |
ValueQualifier | 1235477 | 0 | NaN | : |
SampleFraction | 1235477 | 0 | NaN | : |
TIME | 1235477 | 0 | NaN | : |
DETECTION.LIMITS | 1235477 | 0 | NaN | : |
NO3.DL | 1235477 | 0 | NaN | : |
DIN.DL | 1235477 | 0 | NaN | : |
TON.DL | 1235477 | 0 | NaN | : |
APA.DL | 1235477 | 0 | NaN | : |
Activity.Top.Depth | 1235477 | 0 | NaN | : |
Activity.Bottom.Depth | 1235477 | 0 | NaN | : |
Activity.Depth.Top.Bottom.Unit | 1235477 | 0 | NaN | : |
Audit.Censored.Decisions | 1235477 | 0 | NaN | : |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
X | 0 | 1.00 | 9.026092e+05 | 490957.29 | 1.000000e+00 | 4.771610e+05 | 9.739350e+05 | 1.301861e+06 | 1.700207e+06 | ▆▅▆▇▆ |
year | 1159265 | 0.06 | 2.020430e+03 | 2.89 | 2.014000e+03 | 2.018000e+03 | 2.021000e+03 | 2.023000e+03 | 2.024000e+03 | ▃▃▅▆▇ |
month | 1159247 | 0.06 | 6.690000e+00 | 3.52 | 0.000000e+00 | 3.000000e+00 | 7.000000e+00 | 1.000000e+01 | 1.200000e+01 | ▅▅▇▅▇ |
day | 1159265 | 0.06 | 1.458000e+01 | 8.58 | 1.000000e+00 | 7.000000e+00 | 1.400000e+01 | 2.200000e+01 | 3.100000e+01 | ▇▆▆▆▃ |
lat_deg | 1159265 | 0.06 | 2.526000e+01 | 1.07 | 2.400000e+01 | 2.400000e+01 | 2.500000e+01 | 2.600000e+01 | 2.800000e+01 | ▅▇▃▃▁ |
lat_min | 1159265 | 0.06 | 3.030000e+01 | 15.98 | 3.000000e-02 | 2.041000e+01 | 3.311000e+01 | 4.278000e+01 | 5.983000e+01 | ▆▅▇▇▃ |
Org.Decimal.Latitude | 512995 | 0.58 | 2.558000e+01 | 0.57 | 2.440000e+01 | 2.516000e+01 | 2.555000e+01 | 2.587000e+01 | 2.878000e+01 | ▅▇▂▁▁ |
lon_deg | 1159265 | 0.06 | -8.131000e+01 | 0.92 | -8.500000e+01 | -8.200000e+01 | -8.100000e+01 | -8.100000e+01 | -8.000000e+01 | ▁▂▅▇▃ |
lon_min | 1159265 | 0.06 | 2.776000e+01 | 17.24 | 0.000000e+00 | 1.294000e+01 | 2.481000e+01 | 4.325000e+01 | 6.000000e+01 | ▇▇▆▅▆ |
Org.Decimal.Longitude | 512995 | 0.58 | -8.083000e+01 | 0.70 | -8.502000e+01 | -8.128000e+01 | -8.068000e+01 | -8.024000e+01 | -8.002000e+01 | ▁▁▁▃▇ |
Activity.Depth | 967966 | 0.22 | 4.800000e-01 | 0.20 | 0.000000e+00 | 5.000000e-01 | 5.000000e-01 | 5.000000e-01 | 1.000000e+00 | ▂▁▇▂▁ |
cast | 1159265 | 0.06 | 7.200000e-01 | 0.46 | 0.000000e+00 | 0.000000e+00 | 1.000000e+00 | 1.000000e+00 | 2.000000e+00 | ▃▁▇▁▁ |
DEP.Result.Value.Number | 78726 | 0.94 | 1.074000e+01 | 469.20 | -3.000000e-02 | 9.000000e-02 | 6.900000e-01 | 3.400000e+00 | 5.346500e+04 | ▇▁▁▁▁ |
RowID | 1212436 | 0.02 | 1.009723e+06 | 798651.12 | 4.862170e+05 | 5.143680e+05 | 5.349680e+05 | 1.551534e+06 | 3.870318e+06 | ▇▂▁▁▁ |
ProgramID | 1211526 | 0.02 | 4.057960e+03 | 0.19 | 4.057000e+03 | 4.058000e+03 | 4.058000e+03 | 4.058000e+03 | 4.058000e+03 | ▁▁▁▁▇ |
IndicatorID | 1211526 | 0.02 | 6.730000e+00 | 0.63 | 6.000000e+00 | 6.000000e+00 | 7.000000e+00 | 7.000000e+00 | 8.000000e+00 | ▆▁▇▁▂ |
ParameterID | 1211526 | 0.02 | 8.600000e+00 | 6.56 | 1.000000e+00 | 3.000000e+00 | 6.000000e+00 | 1.600000e+01 | 1.900000e+01 | ▇▃▁▂▅ |
AreaID | 1211526 | 0.02 | 6.090000e+00 | 0.89 | 6.000000e+00 | 6.000000e+00 | 6.000000e+00 | 6.000000e+00 | 1.500000e+01 | ▇▁▁▁▁ |
Year | 579870 | 0.53 | 2.011000e+03 | 9.94 | 1.989480e+03 | 2.001860e+03 | 2.016000e+03 | 2.020000e+03 | 2.024000e+03 | ▂▃▃▁▇ |
Month | 579852 | 0.53 | 6.570000e+00 | 3.45 | 0.000000e+00 | 4.000000e+00 | 7.000000e+00 | 1.000000e+01 | 1.200000e+01 | ▅▅▇▅▇ |
TotalDepth_m | 1235414 | 0.00 | 6.222000e+01 | 105.84 | 1.100000e+00 | 2.100000e+00 | 2.850000e+00 | 8.500000e+00 | 2.500000e+02 | ▇▁▁▁▂ |
MDL | 1096047 | 0.11 | 6.000000e-02 | 0.57 | 0.000000e+00 | 0.000000e+00 | 3.000000e-02 | 8.000000e-02 | 5.050000e+01 | ▇▁▁▁▁ |
PQL | 1096047 | 0.11 | 2.100000e-01 | 2.80 | 0.000000e+00 | 1.000000e-02 | 6.000000e-02 | 2.500000e-01 | 2.500000e+02 | ▇▁▁▁▁ |
Include | 1211526 | 0.02 | 1.000000e+00 | 0.03 | 0.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | ▁▁▁▁▇ |
MADup | 1211526 | 0.02 | 1.000000e+00 | 0.00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | ▁▁▇▁▁ |
ResultValue | 1212436 | 0.02 | 1.286000e+01 | 41.22 | 0.000000e+00 | 2.900000e-01 | 4.890000e+00 | 2.447000e+01 | 5.389000e+03 | ▇▁▁▁▁ |
OriginalLatitude | 1212436 | 0.02 | 2.580000e+01 | 0.02 | 2.577000e+01 | 2.578000e+01 | 2.579000e+01 | 2.581000e+01 | 2.587000e+01 | ▆▇▂▁▂ |
OriginalLongitude | 1212436 | 0.02 | -8.015000e+01 | 0.01 | -8.017000e+01 | -8.016000e+01 | -8.015000e+01 | -8.013000e+01 | -8.012000e+01 | ▇▆▅▅▇ |
LAB.SAMPLE.ID | 1231034 | 0.00 | 3.588884e+10 | 22277401.92 | 3.585419e+10 | 3.586685e+10 | 3.588708e+10 | 3.590707e+10 | 3.592746e+10 | ▇▆▆▆▇ |
REPORTING.LIMIT | 1233502 | 0.00 | 1.890000e+00 | 2.96 | 0.000000e+00 | 3.000000e-02 | 5.000000e-02 | 5.000000e-01 | 7.000000e+00 | ▇▁▁▁▂ |
DILUTION | 1231034 | 0.00 | 1.000000e+00 | 0.00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | ▁▁▇▁▁ |
DEP.Result.ID | 910067 | 0.26 | 8.241046e+06 | 5730392.45 | 1.180000e+02 | 4.799207e+06 | 8.409876e+06 | 1.312994e+07 | 1.787779e+07 | ▇▆▇▅▆ |
SURV | 912731 | 0.26 | 1.200000e+02 | 54.59 | -8.000000e+00 | 7.700000e+01 | 1.230000e+02 | 1.660000e+02 | 2.110000e+02 | ▂▆▇▇▇ |
STA | 920123 | 0.26 | 1.423400e+02 | 163.89 | 1.000000e+00 | 2.800000e+01 | 6.200000e+01 | 1.330000e+02 | 4.790000e+02 | ▇▂▁▁▂ |
YEAR | 920123 | 0.26 | 2.001260e+03 | 4.50 | 1.989480e+03 | 1.997790e+03 | 2.001560e+03 | 2.005030e+03 | 2.008730e+03 | ▁▅▇▇▇ |
NOX.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ▇▃▆▁▁ |
NO2.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ▆▇▁▁▁ |
NH4.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e-02 | ▇▁▁▁▂ |
TN.DL | 920123 | 0.26 | 3.000000e-02 | 0.03 | 0.000000e+00 | 0.000000e+00 | 3.000000e-02 | 5.000000e-02 | 8.000000e-02 | ▇▃▁▆▂ |
TP.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ▇▁▁▁▁ |
SRP.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | ▇▂▁▅▅ |
CHLA.DL | 920123 | 0.26 | 1.000000e-01 | 0.00 | 1.000000e-01 | 1.000000e-01 | 1.000000e-01 | 1.000000e-01 | 1.000000e-01 | ▁▁▇▁▁ |
TOC.DL | 920123 | 0.26 | 1.200000e-01 | 0.04 | 5.000000e-02 | 1.200000e-01 | 1.200000e-01 | 1.600000e-01 | 1.600000e-01 | ▅▁▁▇▆ |
SiO2.DL | 920123 | 0.26 | 0.000000e+00 | 0.00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 1.000000e-02 | ▇▁▁▁▁ |
Station | 1232127 | 0.00 | 2.894200e+02 | 80.47 | 2.000000e+02 | 2.310000e+02 | 2.700000e+02 | 3.140000e+02 | 5.090000e+02 | ▇▇▁▁▂ |
LATDEC | 1228085 | 0.01 | 2.470000e+01 | 0.14 | 2.455000e+01 | 2.462000e+01 | 2.466000e+01 | 2.477000e+01 | 2.548000e+01 | ▇▃▁▁▁ |
LONDEC | 1228085 | 0.01 | -8.147000e+01 | 0.39 | -8.259000e+01 | -8.172000e+01 | -8.163000e+01 | -8.134000e+01 | -8.012000e+01 | ▁▇▃▂▁ |
DEPTH | 1228589 | 0.01 | 9.700000e-01 | 0.11 | 0.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | ▁▁▁▁▇ |
Long.Deg | 1158689 | 0.06 | -7.603000e+01 | 25.15 | -8.100000e+01 | -8.000000e+01 | -8.000000e+01 | -8.000000e+01 | 8.000000e+01 | ▇▁▁▁▁ |
Long.Min | 1158689 | 0.06 | 3.489000e+01 | 16.20 | 0.000000e+00 | 1.956000e+01 | 3.748000e+01 | 4.760000e+01 | 6.575000e+01 | ▃▅▅▇▃ |
Lat.Deg | 1158689 | 0.06 | 2.479000e+01 | 0.40 | 2.400000e+01 | 2.500000e+01 | 2.500000e+01 | 2.500000e+01 | 2.500000e+01 | ▂▁▁▁▇ |
Lat.Min | 1158689 | 0.06 | 2.151000e+01 | 21.52 | 0.000000e+00 | 4.020000e+00 | 9.020000e+00 | 3.968000e+01 | 6.000000e+01 | ▇▁▂▁▃ |
Value.1 | 801057 | 0.35 | 1.250000e+01 | 63.97 | -7.730000e+01 | 3.160000e+00 | 7.810000e+00 | 2.418000e+01 | 4.136100e+04 | ▇▁▁▁▁ |
library(dplyr)
library(reshape2) # for melt()
library(ggplot2)
library(viridis)
library(RColorBrewer)# for scale_fill_distiller()
# 1. Extract & drop NA
vals_raw <- df$DEP.Result.Value.Number
vals_raw <- vals_raw[!is.na(vals_raw)]
# 2. Log-transform
v1 <- log10(vals_raw + 1)
# 3. Percentile of the log-values
pct1 <- ecdf(v1)(v1)
# 4. Grid dims
N <- length(pct1)
ncol <- ceiling(sqrt(N))
nrow <- ceiling(N / ncol)
# 5. Pad
pad_len <- (nrow * ncol) - N
p1_pad <- c(pct1, rep(NA, pad_len))
# 6. Matrix & melt
mat_p1 <- matrix(p1_pad, nrow = nrow, ncol = ncol, byrow = TRUE)
mat_long_p1 <- melt(mat_p1, varnames = c("row","col"), value.name = "pct_log")
# 7. Plot
ggplot(mat_long_p1, aes(x = col, y = row, fill = pct_log)) +
geom_tile(color = NA) +
scale_fill_distiller(
palette = "Spectral", # try "RdYlBu", "PuOr", "BrBG", etc.
direction = 1, # reverse=FALSE so low values start at red-ish end
na.value = "grey90", # color for the padded NA cells
guide = "none" # hide the legend; remove if you want a colorbar
) +
scale_y_reverse() +
theme_void() +
theme(legend.position = "none")
If you have visualization ideas for this data, please open a github issue here.