Deaths in England due to Covid19
Exploring the number of deaths due to Covid19 in England using Pandas and Altair for graphics
In this post we will look at the number of cases and number of deaths due to Covid-19 in England, and we will use these numbers to estimate a few things:
- The approximate number of cases that actually occured during the first wave (Winter and Spring of 2020)
- The mortality rate (or rather, the number of people dying vs the number of positive cases)
- The number death rate for the next few weeks based on the number of new cases over the last couple of weeks.
from uk_covid19 import Cov19API
import pandas as pd
import altair as alt
import numpy as np
#collapse
filter_all_regions = [
"areaType=region"
]
structure_deaths = {
"date": "date",
"areaName": "areaName",
"newCases": "newCasesByPublishDate",
"newDeaths": "newDeathsByDeathDate"
}
eng_deaths = Cov19API(filters=filter_all_regions, structure=structure_deaths).get_dataframe().fillna(0)
eng_deaths['date'] = pd.to_datetime(eng_deaths['date'], format='%Y-%m-%d')
eng_deaths.sort_values(['areaName', 'date'], inplace=True)
eng_deaths.reset_index(drop=True,inplace=True)
eng_deaths['weeklyDeaths'] = eng_deaths.groupby(by='areaName')['newDeaths'].rolling(7).sum().reset_index(drop=True).fillna(0)
eng_deaths['weeklyCases'] = eng_deaths.groupby(by='areaName')['newCases'].rolling(7).sum().reset_index(drop=True).fillna(0)
eng_deaths['mortalityEstimated'] = 100 *(eng_deaths.groupby(by='areaName')['weeklyDeaths'].shift(-14))/eng_deaths['weeklyCases']
Next we do the same for the whole of England.
filter_england = [
"areaType=nation",
"areaName=England"
]
full_eng_deaths = Cov19API(filters=filter_england, structure=structure_deaths).get_dataframe().fillna(0)
full_eng_deaths['date'] = pd.to_datetime(full_eng_deaths['date'], format='%Y-%m-%d')
full_eng_deaths.sort_values(['areaName', 'date'], inplace=True)
full_eng_deaths.reset_index(drop=True,inplace=True)
full_eng_deaths['newDeaths'].iloc[-1] = np.nan
full_eng_deaths['newDeaths'].iloc[-2] = np.nan
full_eng_deaths['newDeaths'].iloc[-3] = np.nan
full_eng_deaths['laggedNewDeaths'] = full_eng_deaths['newDeaths'].shift(-7)
full_eng_deaths['estimateCasesFromDeaths'] = full_eng_deaths['laggedNewDeaths'] * 50
full_eng_deaths['estimateDeathsFromCases'] = full_eng_deaths['newCases'] * 0.02
bars = alt.Chart(eng_deaths.query("date >= '2020-03-01'")).mark_bar().encode(
x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title='Date')),
y=alt.Y("weeklyDeaths:Q", axis=alt.Axis(title='Weekly number of deaths')),
tooltip="newDeaths:Q"
).properties(width=700)
bars.facet(alt.Column('areaName', title='Region'), columns=1).properties(title='Weekly number of deaths in each region')
bars = alt.Chart(eng_deaths.query("date >= '2020-07-07'")).mark_bar().encode(
x=alt.X("yearmonthdate(date):T", axis=alt.Axis(title='Date')),
y=alt.Y("mortalityEstimated:Q", axis=alt.Axis(title='Implied estimated mortality')),
tooltip="mortalityEstimated:Q"
).properties(width=800)
bars.facet(alt.Column('areaName', title='Region'), columns=1).properties(title='Number of deaths as a percentage of number of cases')