An Exception was encountered at 'In [3]'.

In this post we will look at the number of cases and number of deaths due to Covid-19 in England, and we will use these numbers to estimate a few things:

  • The approximate number of cases that actually occured during the first wave (Winter and Spring of 2020)
  • The mortality rate (or rather, the number of people dying vs the number of positive cases)
  • The number death rate for the next few weeks based on the number of new cases over the last couple of weeks.

Importing data

We will grab the data on the number of cases and deaths for each English region and also do some cleaning and feature engineering.

from uk_covid19 import Cov19API

import pandas as pd
import altair as alt
import numpy as np 

#collapse
filter_all_regions = [
    "areaType=region"
]
structure_deaths = {
    "date": "date",
    "areaName": "areaName",
    "newCases": "newCasesByPublishDate",
    "newDeaths": "newDeathsByDeathDate"
}

eng_deaths = Cov19API(filters=filter_all_regions, structure=structure_deaths).get_dataframe().fillna(0)

eng_deaths['date'] = pd.to_datetime(eng_deaths['date'], format='%Y-%m-%d')
eng_deaths.sort_values(['areaName', 'date'], inplace=True)
eng_deaths.reset_index(drop=True,inplace=True)

eng_deaths['weeklyDeaths'] = eng_deaths.groupby(by='areaName')['newDeaths'].rolling(7).sum().reset_index(drop=True).fillna(0)
eng_deaths['weeklyCases'] = eng_deaths.groupby(by='areaName')['newCases'].rolling(7).sum().reset_index(drop=True).fillna(0)
eng_deaths['mortalityEstimated'] = 100 *(eng_deaths.groupby(by='areaName')['weeklyDeaths'].shift(-14))/eng_deaths['weeklyCases']

Next we do the same for the whole of England.

filter_england = [
    "areaType=nation",
    "areaName=England"
]
full_eng_deaths = Cov19API(filters=filter_england, structure=structure_deaths).get_dataframe().fillna(0)

full_eng_deaths['date'] = pd.to_datetime(full_eng_deaths['date'], format='%Y-%m-%d')
full_eng_deaths.sort_values(['areaName', 'date'], inplace=True)
full_eng_deaths.reset_index(drop=True,inplace=True)
full_eng_deaths['newDeaths'].iloc[-1] = np.nan
full_eng_deaths['newDeaths'].iloc[-2] = np.nan
full_eng_deaths['newDeaths'].iloc[-3] = np.nan
full_eng_deaths['laggedNewDeaths'] = full_eng_deaths['newDeaths'].shift(-7)
full_eng_deaths['estimateCasesFromDeaths'] = full_eng_deaths['laggedNewDeaths'] * 50
full_eng_deaths['estimateDeathsFromCases'] = full_eng_deaths['newCases'] * 0.02
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/pandas/core/indexing.py:1637: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)

Plotting the data

We first start by looking at the number of deaths in each English region since the beginning of march, then estimate the mortality rate (rate of deaths per positive case) in each region. We then move on to look at the data for the whole of England.

Plotting: English regions

Execution using papermill encountered an exception here and stopped:

bars = alt.Chart(eng_deaths.query("date >= '2020-03-01'")).mark_bar().encode(
    x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title='Date')),
    y=alt.Y("weeklyDeaths:Q", axis=alt.Axis(title='Weekly number of deaths')),
    tooltip="newDeaths:Q"
).properties(width=700)

bars.facet(alt.Column('areaName', title='Region'), columns=1).properties(title='Weekly number of deaths in each region')
---------------------------------------------------------------------------
MaxRowsError                              Traceback (most recent call last)
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/altair/vegalite/v4/api.py in to_dict(self, *args, **kwargs)
    361         copy = self.copy(deep=False)
    362         original_data = getattr(copy, "data", Undefined)
--> 363         copy.data = _prepare_data(original_data, context)
    364 
    365         if original_data is not Undefined:

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/altair/vegalite/v4/api.py in _prepare_data(data, context)
     82     # convert dataframes  or objects with __geo_interface__ to dict
     83     if isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"):
---> 84         data = _pipe(data, data_transformers.get())
     85 
     86     # convert string input to a URLData

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/toolz/functoolz.py in pipe(data, *funcs)
    625     """
    626     for func in funcs:
--> 627         data = func(data)
    628     return data
    629 

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
    301     def __call__(self, *args, **kwargs):
    302         try:
--> 303             return self._partial(*args, **kwargs)
    304         except TypeError as exc:
    305             if self._should_curry(args, kwargs, exc):

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/altair/vegalite/data.py in default_data_transformer(data, max_rows)
     17 @curried.curry
     18 def default_data_transformer(data, max_rows=5000):
---> 19     return curried.pipe(data, limit_rows(max_rows=max_rows), to_values)
     20 
     21 

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/toolz/functoolz.py in pipe(data, *funcs)
    625     """
    626     for func in funcs:
--> 627         data = func(data)
    628     return data
    629 

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/toolz/functoolz.py in __call__(self, *args, **kwargs)
    301     def __call__(self, *args, **kwargs):
    302         try:
--> 303             return self._partial(*args, **kwargs)
    304         except TypeError as exc:
    305             if self._should_curry(args, kwargs, exc):

/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/altair/utils/data.py in limit_rows(data, max_rows)
     82             "than the maximum allowed ({}). "
     83             "For information on how to plot larger datasets "
---> 84             "in Altair, see the documentation".format(max_rows)
     85         )
     86     return data

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation
alt.FacetChart(...)

bars = alt.Chart(eng_deaths.query("date >= '2020-07-07'")).mark_bar().encode(
    x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title='Date')),
    y=alt.Y("mortalityEstimated:Q", axis=alt.Axis(title='Implied estimated mortality')),
    tooltip="mortalityEstimated:Q"
).properties(width=800)

bars.facet(alt.Column('areaName', title='Region'), columns=1).properties(title='Number of deaths as a percentage of number of cases')

For each region in England, we plot the ration of new deaths (lagged by 7 days) over the number of new cases as a percentage. We only look at the dates in the second half of 2020 because the lack of testing capacity skewed the numbers too much (the mortality rate is certainly not above 10%...). Note that we still see an overestimate of the mortality (in particular in the North East and the South East) over the summer, likely due to low levels of testing capacity. In the end, the implied mortality rate seems to have settled down to around 2%.

Plotting: All of England

The first chart we look at has both the number of new cases and the number of new deaths in England. The blue line corresponds to the axis on the right hand side, the number of new deaths. The green bars are the number of new cases (axis on the left hand side).

base = alt.Chart(full_eng_deaths.query("date >= '2020-03-01'")).encode(x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title=None))).properties(title='Number of new cases and the number of new deaths in England',width=700)

bars = base.mark_bar(color='#57A44C').encode(
    y=alt.Y("newCases:Q", axis=alt.Axis(title='Number of new cases', titleColor='#57A44C')),
    tooltip="newCases:Q"
)

line = base.mark_line(stroke='#5276A7').encode(y=alt.Y("newDeaths:Q", axis=alt.Axis(title='Number of new deaths', titleColor='#5276A7')))

alt.layer(bars, line).resolve_scale(y='independent')

base = alt.Chart(full_eng_deaths.query("date >= '2020-03-01'")).encode(x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title=None))).properties(width=700, title='Estimating the number of deaths from the number of cases vs actual number of deaths')

bars = base.mark_bar(color='#57A44C').encode(
    y=alt.Y("estimateDeathsFromCases:Q", axis=alt.Axis(title='Estimated number of deaths', titleColor='#57A44C')),
    tooltip="newCases:Q"
)

line = base.mark_line(stroke='#5276A7').encode(y=alt.Y("laggedNewDeaths:Q", axis=alt.Axis(title='Actual number of new deaths', titleColor='#5276A7')))

alt.layer(bars, line).resolve_scale(y='independent')