diff --git a/dashboard/about.md b/dashboard/about.md index 4461521..636edf6 100644 --- a/dashboard/about.md +++ b/dashboard/about.md @@ -40,7 +40,9 @@ The Forecaster Evaluation Dashboard is a collaborative project, which has been m #### **Sources** -**Observed values** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. +**Observed cases and deaths** are from the [COVID-19 Data Repository](https://github.com/CSSEGISandData/COVID-19) by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. + +**Observed hospitalizations** are from the U.S. Department of Health & Human Services and is the sum of all adult and pediatric COVID-19 hospital admissions. **Forecaster predictions** are drawn from the [COVID-19 Forecast Hub GitHub repository](https://github.com/reichlab/covid19-forecast-hub/) @@ -61,16 +63,23 @@ Data for the dashboard is pulled from these sources on Mondays and Tuesdays. #### **Dashboard Inclusion Criteria** A forecast is only included if all the following criteria are met: -* The target variable is the weekly incidence of either cases or deaths +* The target variable is the weekly incidence of either cases or deaths, or the daily incidence of hospitalizations * The horizon is no more than 4 weeks ahead * The location is a U.S. state, territory, or the nation as a whole * All dates are parsable. If a date is not in yyyy/mm/dd format, the forecast may be dropped. * The forecast was made on or before the Monday of the relevant week. If multiple versions of a forecast are submitted then only the last forecast that meets the date restriction is included. +#### **How Hospitalization Forecasts are Processed** +Though hospitalizations are forecasted on a daily basis, in keeping with the cases and death scoring and plotting, we show the hospitalization scores on a weekly basis in the dashboard. We only look at forecasts for one target day a week (currently Wednesdays), and calculate the weekly horizons accordingly. Hospitalization horizons are calculated in the following manner: +* 2 days ahead: Forecast date is on or before the Monday preceeding the target date (Wednesday) +* 9 days ahead: Forecast date equal to or before 7 days before the Monday preceeding the target date +* 16 days ahead: Forecast date is equal to or before 14 days before the Monday preceeding the target date +* 23 days ahead: Forecast date equal to or before 21 days before the Monday preceeding the target date + #### **Notes on the Data** * If a forecast does not include an explicit point estimate, the 0.5 quantile is taken as the point estimate for calculating absolute error. -* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and 15 for cases) +* WIS is only shown for forecasts that have predictions for all quantiles (23 quantiles for deaths and hospitalizations and 7 for cases) * Totaling over all states and territories does not include nationwide forecasts. To ensure that values are comparable, these totals also exclude any locations that are absent from any file that was submitted by one of the selected forecasters. * We include revisions of observed values, which means that the scores for forecasts made in the past can change as our understanding of the ground truth changes. @@ -86,5 +95,7 @@ The available files are: * score_cards_nation_deaths.rds * score_cards_state_cases.rds * score_cards_state_deaths.rds +* score_cards_state_hospitalizations.rds +* score_cards_nation_hospitalizations.rds diff --git a/dashboard/app.R b/dashboard/app.R index 72516f4..f94fa1a 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -12,7 +12,17 @@ library(aws.s3) COVERAGE_INTERVALS = c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") DEATH_FILTER = "deaths_incidence_num" CASE_FILTER = "confirmed_incidence_num" +HOSPITALIZATIONS_FILTER = "confirmed_admissions_covid_1d" +HOSPITALIZATIONS_TARGET_DAY = "Wednesday" TOTAL_LOCATIONS = "Totaled Over States*" +AHEAD_OPTIONS = c(1,2,3,4) + +# Num days to offset the forecast week by +# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2, +# ahead 1 has to have forecast date of Monday or earlier, +# ahead 2 has to have forecast date of Monday + 7 days or earlier (offset + 7 days or more), etc +HOSPITALIZATIONS_OFFSET = 2 +HOSPITALIZATIONS_AHEAD_OPTIONS = c(HOSPITALIZATIONS_OFFSET, HOSPITALIZATIONS_OFFSET + 7, HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21) # Score explanations wisExplanation = includeMarkdown("wis.md") @@ -64,9 +74,8 @@ ui <- fluidPage(padding=0, conditionalPanel(condition = "input.tabset == 'evaluations'", radioButtons("targetVariable", "Target Variable", choices = list("Incident Deaths" = "Deaths", - "Incident Cases" = "Cases")), - - + "Incident Cases" = "Cases", + "Hospital Admissions" = "Hospitalizations")), radioButtons("scoreType", "Scoring Metric", choices = list("Weighted Interval Score" = "wis", "Spread" = "sharpness", @@ -78,13 +87,13 @@ ui <- fluidPage(padding=0, "logScale", "Log Scale", value = FALSE, - ), + )), + conditionalPanel(condition = "input.scoreType != 'coverage' && input.targetVariable != 'Hospitalizations'", checkboxInput( "scaleByBaseline", "Scale by Baseline Forecaster", value = FALSE, - ) - ), + )), selectInput( "forecasters", p("Forecasters", tags$br(), tags$span(id="forecaster-input", "Type a name or select from dropdown")), @@ -96,8 +105,8 @@ ui <- fluidPage(padding=0, checkboxGroupInput( "aheads", "Forecast Horizon (Weeks)", - choices = c(1,2,3,4), - selected = 1, + choices = AHEAD_OPTIONS, + selected = AHEAD_OPTIONS[1], inline = TRUE ), conditionalPanel(condition = "input.scoreType == 'coverage'", @@ -225,7 +234,9 @@ server <- function(input, output, session) { dfStateDeaths <- getData("score_cards_state_deaths.rds") dfNationCases = getData("score_cards_nation_cases.rds") dfNationDeaths = getData("score_cards_nation_deaths.rds") - + dfStateHospitalizations = getData("score_cards_state_hospitalizations.rds") + dfNationHospitalizations = getData("score_cards_nation_hospitalizations.rds") + # Pick out expected columns only covCols = paste0("cov_", COVERAGE_INTERVALS) expectedCols = c("ahead", "geo_value", "forecaster", "forecast_date", @@ -237,8 +248,10 @@ server <- function(input, output, session) { dfStateDeaths = dfStateDeaths %>% select(all_of(expectedCols)) dfNationCases = dfNationCases %>% select(all_of(expectedCols)) dfNationDeaths = dfNationDeaths %>% select(all_of(expectedCols)) + dfStateHospitalizations = dfStateHospitalizations %>% select(all_of(expectedCols)) + dfNationHospitalizations = dfNationHospitalizations %>% select(all_of(expectedCols)) - df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths) + df <- rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths, dfStateHospitalizations, dfNationHospitalizations) df <- df %>% rename("10" = cov_10, "20" = cov_20, "30" = cov_30, "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70, "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98) # Prepare color palette @@ -262,14 +275,19 @@ server <- function(input, output, session) { if (targetVariable == "Deaths") { signalFilter = DEATH_FILTER } + if (targetVariable == "Hospitalizations") { + signalFilter = HOSPITALIZATIONS_FILTER + } scoreDf = scoreDf %>% filter(signal == signalFilter) %>% - filter(ahead %in% horizon) %>% filter(forecaster %in% forecasters) - + if (signalFilter == HOSPITALIZATIONS_FILTER) { + scoreDf = filterHospitalizationsAheads(scoreDf) + } + scoreDf = scoreDf %>% filter(ahead %in% horizon) filteredScoreDf <- scoreDf %>% rename(Forecaster = forecaster, Forecast_Date = forecast_date, Week_End_Date = target_end_date) - + if (scoreType == "wis" || scoreType == "sharpness") { # Only show WIS or Sharpness for forecasts that have all intervals filteredScoreDf = filteredScoreDf %>% filter(!is.na(`50`)) %>% filter(!is.na(`80`)) %>% filter(!is.na(`95`)) @@ -306,6 +324,7 @@ server <- function(input, output, session) { # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf = filteredScoreDf %>% group_by(Forecaster, Week_End_Date, ahead) %>% summarize(location_list = paste(sort(unique(geo_value)),collapse=",")) + locationDf = locationDf %>% filter(location_list != c('us')) # Create a list containing each row's location list locationList = sapply(locationDf$location_list, function(x) strsplit(x, ",")) locationList = lapply(locationList, function(x) x[x != 'us']) @@ -328,7 +347,7 @@ server <- function(input, output, session) { output$renderAggregateText = renderText(paste(aggregateText, " Locations included: ")) } if (length(locationsIntersect) == 0) { - output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common.") + output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common on all dates.") output$renderLocations <- renderText("") output$renderAggregateText = renderText("") hideElement("truthPlot") @@ -387,9 +406,16 @@ server <- function(input, output, session) { as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>% group_by(Forecaster, Forecast_Date, ahead) %>% fill_gaps(.full = TRUE) - - filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = c(1, 2, 3, 4), - labels = c("Horizon: 1 Week", "Horizon: 2 Weeks", "Horizon: 3 Weeks", "Horizon: 4 Weeks")) + # Set labels for faceted horizon plots + horizonOptions = AHEAD_OPTIONS + horizonLabels = lapply(AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Week(s)")) + if (targetVariable == 'Hospitalizations') { + horizonOptions = HOSPITALIZATIONS_AHEAD_OPTIONS + horizonLabels = lapply(HOSPITALIZATIONS_AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Days")) + } + filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = horizonOptions, + labels = horizonLabels) + # Set forecaster colors for plot set.seed(colorSeed) forecasterRand <- sample(unique(df$forecaster)) colorPalette = setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand) @@ -439,10 +465,14 @@ server <- function(input, output, session) { ################### # Create the plot for target variable ground truth truthPlot = function(scoreDf = NULL, targetVariable = NULL, locationsIntersect = NULL, allLocations = FALSE) { - titleText = paste0('Observed Incident ', targetVariable, '') + observation = paste0('Incident ', targetVariable) + if (targetVariable == "Hospitalizations") { + observation = paste0('Hospital Admissions') + } + titleText = paste0('Observed ', observation, '') if (allLocations) { - titleText = paste0('Observed Incident ', targetVariable, '', '
Totaled over all states and territories common to selected forecasters*') - } + titleText = paste0('Observed ', observation, '', '
Totaled over all states and territories common to selected forecasters*') + } scoreDf <- scoreDf %>% group_by(Week_End_Date) %>% summarize(Reported_Incidence = actual) @@ -480,9 +510,13 @@ server <- function(input, output, session) { observeEvent(input$targetVariable, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) + } else { + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, TRUE) updateForecasterChoices(session, df, input$forecasters, input$scoreType) updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) @@ -491,8 +525,10 @@ server <- function(input, output, session) { observeEvent(input$scoreType, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) + } else { + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } # Only show forecasters that have data for the score chosen updateForecasterChoices(session, df, input$forecasters, input$scoreType) @@ -527,21 +563,14 @@ server <- function(input, output, session) { observeEvent(input$forecasters, { if (input$targetVariable == 'Deaths') { df = df %>% filter(signal == DEATH_FILTER) - } else { + } else if (input$targetVariable == 'Cases') { df = df %>% filter(signal == CASE_FILTER) - } - df = df %>% filter(forecaster %in% input$forecasters) - aheadChoices = unique(df$ahead) - # Ensure previsouly selected options are still allowed - if (input$aheads %in% aheadChoices) { - selectedAheads = input$aheads } else { - selectedAheads = 1 + df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) } - updateCheckboxGroupInput(session, "aheads", - choices = aheadChoices, - selected = selectedAheads, - inline = TRUE) + df = df %>% filter(forecaster %in% input$forecasters) + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, FALSE) updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) }) @@ -550,13 +579,18 @@ server <- function(input, output, session) { observe({ # Ensure there is always one ahead selected if(length(input$aheads) < 1) { - updateCheckboxGroupInput(session, "aheads", - selected = 1) + if (input$targetVariable == 'Hospitalizations') { + updateCheckboxGroupInput(session, "aheads", + selected = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) + } else { + updateCheckboxGroupInput(session, "aheads", + selected = AHEAD_OPTIONS[1]) + } } # Ensure there is always one forecaster selected if(length(input$forecasters) < 1) { updateSelectInput(session, "forecasters", - selected = c("COVIDhub-baseline")) + selected = c("COVIDhub-ensemble")) # Use ensemble rather than baseline bc it has hospitalization scores } # Ensure COVIDhub-baseline is selected when scaling by baseline if(input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) { @@ -616,4 +650,52 @@ updateLocationChoices = function(session, df, targetVariable, forecasterChoices, selected = selectedLocation) } +updateAheadChoices = function(session, df, targetVariable, forecasterChoices, aheads, targetVariableChange) { + df = df %>% filter(forecaster %in% forecasterChoices) + aheadOptions = AHEAD_OPTIONS + title = "Forecast Horizon (Weeks)" + if (targetVariable == 'Hospitalizations') { + aheadOptions = HOSPITALIZATIONS_AHEAD_OPTIONS + title = "Forecast Horizon (Days)" + } + aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions) + # Ensure previsouly selected options are still allowed + if (!is.null(aheads) && aheads %in% aheadChoices) { + selectedAheads = aheads + } else { + selectedAheads = aheadOptions[1] + } + # If we are changing target variable, always reset ahead selection to first option + if (targetVariableChange) { + selectedAheads = aheadOptions[1] + } + updateCheckboxGroupInput(session, "aheads", + title, + choices = aheadChoices, + selected = selectedAheads, + inline = TRUE) +} + +# Only use weekly aheads for hospitalizations +# May change in the future +filterHospitalizationsAheads = function(scoreDf) { + scoreDf['weekday'] = weekdays(as.Date(scoreDf$target_end_date)) + scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY) + + oneAheadDf = scoreDf %>% filter(ahead >= HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) + twoAheadDf = scoreDf %>% filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]) + threeAheadDf = scoreDf %>% filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]) + fourAheadDf = scoreDf %>% filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4]) + + return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf)) +} + shinyApp(ui = ui, server = server)