# 读取crime hdf5数据集,行索引设为REPORTED_DATE,检查其数据类型
crime = pd.read_hdf('data/crime.h5', 'crime').set_index('REPORTED_DATE')
print(type(crime.index))
#<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
# 用between_time方法选取发生在凌晨2点到5点的案件
crime.between_time('2:00', '5:00', include_end=False).head()
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2014-06-29 02:01:00 | traffic-accident-dui-duid | traffic-accident | -105.000149 | 39.745753 | cbd | 0 | 1 |
2014-06-29 02:00:00 | disturbing-the-peace | public-disorder | -105.020719 | 39.706674 | athmar-park | 1 | 0 |
2014-06-29 02:18:00 | curfew | public-disorder | -105.001552 | 39.769505 | sunnyside | 1 | 0 |
2014-06-29 04:17:00 | aggravated-assault | aggravated-assault | -105.018557 | 39.679229 | college-view-south-platte | 1 | 0 |
2014-06-29 04:22:00 | violation-of-restraining-order | all-other-crimes | -104.972447 | 39.739449 | cheesman-park | 1 | 0 |
# 用at_time方法选取特定时间
crime.at_time('5:47').head()
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2013-11-26 05:47:00 | criminal-mischief-other | public-disorder | -104.991476 | 39.751536 | cbd | 1 | 0 |
2017-04-09 05:47:00 | criminal-mischief-mtr-veh | public-disorder | -104.959394 | 39.678425 | university | 1 | 0 |
2017-02-19 05:47:00 | criminal-mischief-other | public-disorder | -104.986767 | 39.741336 | north-capitol-hill | 1 | 0 |
2017-02-16 05:47:00 | aggravated-assault | aggravated-assault | -104.934029 | 39.732320 | hale | 1 | 0 |
2017-02-12 05:47:00 | police-interference | all-other-crimes | -104.976306 | 39.722644 | speer | 1 | 0 |
# first方法可以选取排在前面的n个时间
# 首先将时间索引排序,然后使用pd.offsets模块
crime_sort = crime.sort_index()
pd.options.display.max_rows = 6
crime_sort.first(pd.offsets.MonthBegin(6))
#offset 是针对datetime格式的index进行范围限定,如果dataFrame的序列未按照时间排序,
#它也会筛选出来,但是通过int,窗口就是顺序计数。
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-06-30 23:50:00 | criminal-mischief-mtr-veh | public-disorder | -104.838271 | 39.788683 | montbello | 1 | 0 |
2012-06-30 23:54:00 | traffic-accident-hit-and-run | traffic-accident | -105.014162 | 39.740439 | lincoln-park | 0 | 1 |
2012-07-01 00:01:00 | robbery-street | robbery | -104.924292 | 39.767585 | northeast-park-hill | 1 | 0 |
27489 rows × 7 columns
# 前面的结果最后一条是7月的数据,这是因为pandas使用的是行索引中的第一个值,也就是2012-01-02 00:06:00
# 下面使用MonthEnd
crime_sort.first(pd.offsets.MonthEnd(6))
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-06-29 23:41:00 | robbery-street | robbery | -104.991912 | 39.756163 | five-points | 1 | 0 |
2012-06-29 23:57:00 | assault-simple | other-crimes-against-persons | -104.987360 | 39.715162 | speer | 1 | 0 |
2012-06-30 00:04:00 | traffic-accident | traffic-accident | -104.894697 | 39.628902 | hampden-south | 0 | 1 |
27332 rows × 7 columns
# 上面的结果中,6月30日的数据只有一条,这也是因为第一个时间值的原因。
# 所有的DateOffsets对象都有一个normalize参数,当其设为True时,会将所有时间归零。
# 下面就是我们想获得的结果
crime_sort.first(pd.offsets.MonthBegin(6, normalize=True))
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-06-30 23:44:00 | traffic-accident | traffic-accident | -104.987578 | 39.711158 | baker | 0 | 1 |
2012-06-30 23:50:00 | criminal-mischief-mtr-veh | public-disorder | -104.838271 | 39.788683 | montbello | 1 | 0 |
2012-06-30 23:54:00 | traffic-accident-hit-and-run | traffic-accident | -105.014162 | 39.740439 | lincoln-park | 0 | 1 |
27488 rows × 7 columns
# 选取2012年1月到6月的数据
crime_sort.loc[:'2012-06']
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-06-30 23:44:00 | traffic-accident | traffic-accident | -104.987578 | 39.711158 | baker | 0 | 1 |
2012-06-30 23:50:00 | criminal-mischief-mtr-veh | public-disorder | -104.838271 | 39.788683 | montbello | 1 | 0 |
2012-06-30 23:54:00 | traffic-accident-hit-and-run | traffic-accident | -105.014162 | 39.740439 | lincoln-park | 0 | 1 |
27488 rows × 7 columns
## 5天
crime_sort.first('5D')
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-01-06 23:30:00 | assault-dv | other-crimes-against-persons | -104.958983 | 39.674135 | university-park | 1 | 0 |
2012-01-06 23:44:00 | theft-of-motor-vehicle | auto-theft | -104.845356 | 39.794035 | montbello | 1 | 0 |
2012-01-06 23:55:00 | threats-to-injure | public-disorder | -105.004788 | 39.708714 | athmar-park | 1 | 0 |
605 rows × 7 columns
## 5个工作日
crime_sort.first('5B')
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-01-08 23:52:00 | theft-other | larceny | -104.968227 | 39.739752 | cheesman-park | 1 | 0 |
2012-01-09 00:04:00 | traffic-accident-hit-and-run | traffic-accident | -104.973343 | 39.760757 | five-points | 0 | 1 |
2012-01-09 00:05:00 | fraud-criminal-impersonation | white-collar-crime | -105.024676 | 39.712702 | valverde | 1 | 0 |
879 rows × 7 columns
## 7周
crime_sort.first('7W')
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-02-18 22:20:00 | traffic-accident-dui-duid | traffic-accident | -104.919946 | 39.761917 | north-park-hill | 0 | 1 |
2012-02-18 22:44:00 | criminal-mischief-mtr-veh | public-disorder | -105.044984 | 39.736776 | west-colfax | 1 | 0 |
2012-02-18 23:27:00 | theft-items-from-vehicle | theft-from-motor-vehicle | -105.009018 | 39.708701 | athmar-park | 1 | 0 |
6708 rows × 7 columns
# 第3季度开始
crime_sort.first('3QS') #Quarterly 季度
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2012-01-02 00:06:00 | aggravated-assault | aggravated-assault | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:06:00 | violation-of-restraining-order | all-other-crimes | -104.816860 | 39.796717 | montbello | 1 | 0 |
2012-01-02 00:16:00 | traffic-accident-dui-duid | traffic-accident | -104.971851 | 39.736874 | cheesman-park | 0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... |
2012-09-30 23:29:00 | theft-of-motor-vehicle | auto-theft | -104.988838 | 39.686925 | overland | 1 | 0 |
2012-09-30 23:41:00 | traffic-accident-hit-and-run | traffic-accident | -105.087598 | 39.638462 | marston | 0 | 1 |
2012-09-30 23:43:00 | robbery-business | robbery | -104.772712 | 39.781966 | gateway-green-valley-ranch | 1 | 0 |
43045 rows × 7 columns
原理
# 使用datetime模块的time对象
import datetime
crime.between_time(datetime.time(2,0), datetime.time(5,0), include_end=False)
OFFENSE_TYPE_ID | OFFENSE_CATEGORY_ID | GEO_LON | GEO_LAT | NEIGHBORHOOD_ID | IS_CRIME | IS_TRAFFIC | |
---|---|---|---|---|---|---|---|
REPORTED_DATE | |||||||
2014-06-29 02:01:00 | traffic-accident-dui-duid | traffic-accident | -105.000149 | 39.745753 | cbd | 0 | 1 |
2014-06-29 02:00:00 | disturbing-the-peace | public-disorder | -105.020719 | 39.706674 | athmar-park | 1 | 0 |
2014-06-29 02:18:00 | curfew | public-disorder | -105.001552 | 39.769505 | sunnyside | 1 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
2017-09-13 02:21:00 | assault-simple | other-crimes-against-persons | -104.925733 | 39.654184 | university-hills | 1 | 0 |
2017-09-13 03:21:00 | traffic-accident-dui-duid | traffic-accident | -105.010711 | 39.757385 | highland | 0 | 1 |
2017-09-13 02:15:00 | traffic-accident-hit-and-run | traffic-accident | -105.043950 | 39.787436 | regis | 0 | 1 |
29078 rows × 7 columns
# 选取第一个时间
# 用两种方法加六个月
first_date = crime_sort.index[0]
first_date
#Timestamp('2012-01-02 00:06:00')
first_date + pd.offsets.MonthBegin(6)
#Timestamp('2012-07-01 00:06:00')
first_date + pd.offsets.MonthEnd(6)
#Timestamp('2012-06-30 00:06:00')
更多
# 使用自定义的DateOffset对象
dt = pd.Timestamp('2012-1-16 13:40')
dt + pd.DateOffset(months=1)
#Timestamp('2012-02-16 13:40:00')
# 一个使用更多日期和时间的例子
do = pd.DateOffset(years=2, months=5, days=3, hours=8, seconds=10)
pd.Timestamp('2012-1-22 03:22') + do
#Timestamp('2014-06-25 11:22:10')
pd.options.display.max_rows=60