-
Notifications
You must be signed in to change notification settings - Fork 0
/
county_stats.py
28 lines (22 loc) · 1.15 KB
/
county_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import numpy as np
import pandas as pd
def calculate_stats():
df = pd.read_csv('./county.csv')
# convert string to numbers
df['med_household_income_2017'] = df['med_household_income_2017'].str.replace('$','')
string_columns = ['high_school', 'clinton', 'trump', 'med_household_income_2017']
for column in string_columns:
df[column] = pd.to_numeric(df[column].str.replace(',',''))
df['high_school_degree_percent'] = (1 - (df['high_school'] / df['totalPop'])).multiply(100).round(1)
# 0 for Democrat and 1 for Republican
df['politics'] = (df['clinton'] / (df['clinton'] + df['trump'])).round(2)
df['employment_rate_2017'] = (df['employed_2017'] / df['labor_total_2017']).multiply(100).round(1)
df_stats = df[['id', 'county', 'rural_urban', 'urban_influence', 'high_school_degree_percent',
'politics', 'percent_pov_all', 'percent_pov_child', 'employment_rate_2017',
'med_household_income_2017', 'medHHIncome_percent_state_2017',
'white', 'whiteAlone']]
df_stats.to_csv('county_stats.csv')
def main():
calculate_stats()
if __name__ == "__main__":
main()