政治献金案例

#US 大选的数据分析

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6,
          'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
of_interest = ['Obama, Barack', 'Romney, Mitt', 'Santorum, Rick', 
               'Paul, Ron', 'Gingrich, Newt']
parties = {
  'Bachmann, Michelle': 'Republican',
  'Romney, Mitt': 'Republican',
  'Obama, Barack': 'Democrat',
  "Roemer, Charles E. 'Buddy' III": 'Reform',
  'Pawlenty, Timothy': 'Republican',
  'Johnson, Gary Earl': 'Libertarian',
  'Paul, Ron': 'Republican',
  'Santorum, Rick': 'Republican',
  'Cain, Herman': 'Republican',
  'Gingrich, Newt': 'Republican',
  'McCotter, Thaddeus G': 'Republican',
  'Huntsman, Jon': 'Republican',
  'Perry, Rick': 'Republican'           
 }
data = pd.read_csv('data/usa_election.txt') #读文件
data.head() #看前5行
data.shape   #文件样式
data.dtypes  #数据类型
1 # 使用map函数+字典,新建一列各个候选人所在党派party
data['party']=data['cand_nm'].map(parties) #parties {}
data.head(3) #看前3行数据
2 #使用np.unique()函数查看colums:party这一列中有哪些元素  有哪些党派参加竞选
data['party'].unique()
3 # 使用value_counts()函数,统计party列中各个元素出现次数   各党派出现的次数
data['party'].value_counts()
4 # 各个党派收到政治献金总数
data.groupby('party')['contb_receipt_amt'].sum()  #分组 聚合
5 #查看具体每天各个党派收到的政治献金总数contb_receipt_amt  使用groupby([多个分组参数])
data.groupby(['contb_receipt_dt','party'])['contb_receipt_amt'].sum()  
6 # 20-JUN-11  转时间格式
def transform_date(date):
    day,month,year = date.split('-')
    month = months[month]  #dict['key']
    return '20'+year + '-' + str(month) + '-' + day
data['contb_receipt_dt'] = data['contb_receipt_dt'].map(transform_date) #apply
data.head()
7 #查看是否转化成功
data['contb_eceipt_dt']
8 #查看老兵最支持谁? Series索引 现将老兵行数据取出来

data['contbr_occupation'] == 'DISABLED VETERAN' #返回布尔值
old_bing = data.loc[data['contbr_occupation'] == 'DISABLED VETERAN']
old_bing
r3 = old_bing.groupby('cand_nm',axis=0)['contb_receipt_amt'].sum() #竞选者分组
r3

9 #找出各个候选人的捐赠者中,捐赠金额最大的人的职业以及捐献额   通过query("查询条件来查找捐献人职业")?

data.query("contb_receipt_amt == %s"%(data['contb_receipt_amt'].max()))

猜你喜欢

转载自www.cnblogs.com/zhangchen-sx/p/11125961.html