case_fatality_rate added

This commit is contained in:
Rory Healy 2021-04-10 14:59:49 +10:00
parent 8f7e856039
commit 4a04643bc7
3 changed files with 2084 additions and 74006 deletions

73994
output.csv

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -24,13 +24,8 @@ new_deaths_grouped = pd.DataFrame(new_deaths_grouped.to_records())
new_cases_grouped.sort_values(by = ['location', 'date'], inplace = True)
new_deaths_grouped.sort_values(by = ['location', 'date'], inplace = True)
# rename columns
new_cases_grouped.rename(columns = {'date': 'month'}, inplace = True)
new_deaths_grouped.rename(columns = {'date': 'month'}, inplace = True)
# merge new_deaths and new_cases
new_cases_grouped = new_cases_grouped.reindex(columns = ['location', 'month', 'new_cases'])
aggregated_data = new_cases_grouped.merge(new_deaths_grouped, how = 'outer', left_on = ['location', 'month'], right_on = ['location', 'month'])
aggregated_data = new_cases_grouped.merge(new_deaths_grouped, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
# filter out all entries that aren't at the end of the month
all_covid_data['end_of_month'] = pd.to_datetime(all_covid_data['date']).dt.is_month_end
@ -39,15 +34,19 @@ all_covid_data = all_covid_data.loc[all_covid_data.end_of_month, :]
# extract monthly total cases and total deaths
total_cases = all_covid_data.loc[:, ['location', 'date', 'total_cases']]
total_cases.date = total_cases.date.dt.month
total_cases.rename(columns = {'date': 'month'}, inplace = True)
total_deaths = all_covid_data.loc[:, ['location', 'date', 'total_deaths']]
total_deaths.date = total_deaths.date.dt.month
total_deaths.rename(columns = {'date': 'month'}, inplace = True)
# merge total_deaths and total_cases into aggregated_data
aggregated_data = aggregated_data.merge(total_cases, how = 'outer', left_on = ['location', 'month'], right_on = ['location', 'month'])
aggregated_data = aggregated_data.merge(total_deaths, how = 'outer', left_on = ['location', 'month'], right_on = ['location', 'month'])
aggregated_data = aggregated_data.reindex(columns = ['location', 'month', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths'])
aggregated_data = aggregated_data.merge(total_cases, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
aggregated_data = aggregated_data.merge(total_deaths, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
aggregated_data.rename(columns = {'date': 'month'}, inplace = True)
print(aggregated_data.head(25))
# compute case fatality rate for each month
aggregated_data['case_fatality_rate'] = (aggregated_data['new_deaths'] / aggregated_data['new_cases'])
aggregated_data = aggregated_data.reindex(columns = ['location', 'month', 'case_fatality_rate', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths'])
# output results to csv and stdout
print(aggregated_data.head(5))
aggregated_data.to_csv('owid-covid-data-2020-monthly.csv')