added argument parsing
This commit is contained in:
parent
4a04643bc7
commit
1a03366fb9
2 changed files with 2085 additions and 2078 deletions
File diff suppressed because it is too large
Load diff
17
parta1.py
17
parta1.py
|
@ -1,6 +1,11 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
# parse input arguments
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('path_to_csv', help = 'path to the csv file')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
all_covid_data = pd.read_csv('data/owid-covid-data.csv', encoding = 'ISO-8859-1')
|
all_covid_data = pd.read_csv('data/owid-covid-data.csv', encoding = 'ISO-8859-1')
|
||||||
|
|
||||||
# filter out data past 2020
|
# filter out data past 2020
|
||||||
|
@ -24,7 +29,7 @@ new_deaths_grouped = pd.DataFrame(new_deaths_grouped.to_records())
|
||||||
new_cases_grouped.sort_values(by = ['location', 'date'], inplace = True)
|
new_cases_grouped.sort_values(by = ['location', 'date'], inplace = True)
|
||||||
new_deaths_grouped.sort_values(by = ['location', 'date'], inplace = True)
|
new_deaths_grouped.sort_values(by = ['location', 'date'], inplace = True)
|
||||||
|
|
||||||
# merge new_deaths and new_cases
|
# merge new_deaths_grouped and new_cases_grouped
|
||||||
aggregated_data = new_cases_grouped.merge(new_deaths_grouped, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
aggregated_data = new_cases_grouped.merge(new_deaths_grouped, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
||||||
|
|
||||||
# filter out all entries that aren't at the end of the month
|
# filter out all entries that aren't at the end of the month
|
||||||
|
@ -41,12 +46,14 @@ total_deaths.date = total_deaths.date.dt.month
|
||||||
# merge total_deaths and total_cases into aggregated_data
|
# merge total_deaths and total_cases into aggregated_data
|
||||||
aggregated_data = aggregated_data.merge(total_cases, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
aggregated_data = aggregated_data.merge(total_cases, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
||||||
aggregated_data = aggregated_data.merge(total_deaths, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
aggregated_data = aggregated_data.merge(total_deaths, how = 'outer', left_on = ['location', 'date'], right_on = ['location', 'date'])
|
||||||
aggregated_data.rename(columns = {'date': 'month'}, inplace = True)
|
|
||||||
|
|
||||||
# compute case fatality rate for each month
|
# compute case fatality rate for each month
|
||||||
aggregated_data['case_fatality_rate'] = (aggregated_data['new_deaths'] / aggregated_data['new_cases'])
|
aggregated_data['case_fatality_rate'] = (aggregated_data['new_deaths'] / aggregated_data['new_cases'])
|
||||||
aggregated_data = aggregated_data.reindex(columns = ['location', 'month', 'case_fatality_rate', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths'])
|
|
||||||
|
|
||||||
# output results to csv and stdout
|
# format aggregated_data and output results
|
||||||
|
aggregated_data = aggregated_data.reindex(columns = ['location', 'date', 'case_fatality_rate', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths'])
|
||||||
|
aggregated_data.rename(columns = {'date': 'month'}, inplace = True)
|
||||||
|
aggregated_data.set_index(['location', 'month'], inplace = True)
|
||||||
|
|
||||||
print(aggregated_data.head(5))
|
print(aggregated_data.head(5))
|
||||||
aggregated_data.to_csv('owid-covid-data-2020-monthly.csv')
|
aggregated_data.to_csv(args.path_to_csv)
|
||||||
|
|
Loading…
Reference in a new issue