只使用适用于DatetimeIndex的方法

# 读取crime hdf5数据集,行索引设为REPORTED_DATE,检查其数据类型
crime = pd.read_hdf('data/crime.h5', 'crime').set_index('REPORTED_DATE')
print(type(crime.index))
#<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
# 用between_time方法选取发生在凌晨2点到5点的案件
crime.between_time('2:00', '5:00', include_end=False).head()
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2014-06-29 02:01:00 traffic-accident-dui-duid traffic-accident -105.000149 39.745753 cbd 0 1
2014-06-29 02:00:00 disturbing-the-peace public-disorder -105.020719 39.706674 athmar-park 1 0
2014-06-29 02:18:00 curfew public-disorder -105.001552 39.769505 sunnyside 1 0
2014-06-29 04:17:00 aggravated-assault aggravated-assault -105.018557 39.679229 college-view-south-platte 1 0
2014-06-29 04:22:00 violation-of-restraining-order all-other-crimes -104.972447 39.739449 cheesman-park 1 0
# 用at_time方法选取特定时间
crime.at_time('5:47').head()
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2013-11-26 05:47:00 criminal-mischief-other public-disorder -104.991476 39.751536 cbd 1 0
2017-04-09 05:47:00 criminal-mischief-mtr-veh public-disorder -104.959394 39.678425 university 1 0
2017-02-19 05:47:00 criminal-mischief-other public-disorder -104.986767 39.741336 north-capitol-hill 1 0
2017-02-16 05:47:00 aggravated-assault aggravated-assault -104.934029 39.732320 hale 1 0
2017-02-12 05:47:00 police-interference all-other-crimes -104.976306 39.722644 speer 1 0
# first方法可以选取排在前面的n个时间
# 首先将时间索引排序,然后使用pd.offsets模块
crime_sort = crime.sort_index()
pd.options.display.max_rows = 6
crime_sort.first(pd.offsets.MonthBegin(6))
#offset 是针对datetime格式的index进行范围限定,如果dataFrame的序列未按照时间排序,
#它也会筛选出来,但是通过int,窗口就是顺序计数。
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-06-30 23:50:00 criminal-mischief-mtr-veh public-disorder -104.838271 39.788683 montbello 1 0
2012-06-30 23:54:00 traffic-accident-hit-and-run traffic-accident -105.014162 39.740439 lincoln-park 0 1
2012-07-01 00:01:00 robbery-street robbery -104.924292 39.767585 northeast-park-hill 1 0

27489 rows × 7 columns

# 前面的结果最后一条是7月的数据,这是因为pandas使用的是行索引中的第一个值,也就是2012-01-02 00:06:00
# 下面使用MonthEnd
crime_sort.first(pd.offsets.MonthEnd(6))
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-06-29 23:41:00 robbery-street robbery -104.991912 39.756163 five-points 1 0
2012-06-29 23:57:00 assault-simple other-crimes-against-persons -104.987360 39.715162 speer 1 0
2012-06-30 00:04:00 traffic-accident traffic-accident -104.894697 39.628902 hampden-south 0 1

27332 rows × 7 columns

# 上面的结果中,6月30日的数据只有一条,这也是因为第一个时间值的原因。
# 所有的DateOffsets对象都有一个normalize参数,当其设为True时,会将所有时间归零。
# 下面就是我们想获得的结果
crime_sort.first(pd.offsets.MonthBegin(6, normalize=True))
OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-06-30 23:44:00 traffic-accident traffic-accident -104.987578 39.711158 baker 0 1
2012-06-30 23:50:00 criminal-mischief-mtr-veh public-disorder -104.838271 39.788683 montbello 1 0
2012-06-30 23:54:00 traffic-accident-hit-and-run traffic-accident -105.014162 39.740439 lincoln-park 0 1

27488 rows × 7 columns

# 选取2012年1月到6月的数据
crime_sort.loc[:'2012-06']
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-06-30 23:44:00 traffic-accident traffic-accident -104.987578 39.711158 baker 0 1
2012-06-30 23:50:00 criminal-mischief-mtr-veh public-disorder -104.838271 39.788683 montbello 1 0
2012-06-30 23:54:00 traffic-accident-hit-and-run traffic-accident -105.014162 39.740439 lincoln-park 0 1

27488 rows × 7 columns

## 5天
crime_sort.first('5D')
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-01-06 23:30:00 assault-dv other-crimes-against-persons -104.958983 39.674135 university-park 1 0
2012-01-06 23:44:00 theft-of-motor-vehicle auto-theft -104.845356 39.794035 montbello 1 0
2012-01-06 23:55:00 threats-to-injure public-disorder -105.004788 39.708714 athmar-park 1 0

605 rows × 7 columns

## 5个工作日
crime_sort.first('5B')
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-01-08 23:52:00 theft-other larceny -104.968227 39.739752 cheesman-park 1 0
2012-01-09 00:04:00 traffic-accident-hit-and-run traffic-accident -104.973343 39.760757 five-points 0 1
2012-01-09 00:05:00 fraud-criminal-impersonation white-collar-crime -105.024676 39.712702 valverde 1 0

879 rows × 7 columns

## 7周
crime_sort.first('7W')
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-02-18 22:20:00 traffic-accident-dui-duid traffic-accident -104.919946 39.761917 north-park-hill 0 1
2012-02-18 22:44:00 criminal-mischief-mtr-veh public-disorder -105.044984 39.736776 west-colfax 1 0
2012-02-18 23:27:00 theft-items-from-vehicle theft-from-motor-vehicle -105.009018 39.708701 athmar-park 1 0

6708 rows × 7 columns

# 第3季度开始
crime_sort.first('3QS') #Quarterly 季度
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2012-01-02 00:06:00 aggravated-assault aggravated-assault -104.816860 39.796717 montbello 1 0
2012-01-02 00:06:00 violation-of-restraining-order all-other-crimes -104.816860 39.796717 montbello 1 0
2012-01-02 00:16:00 traffic-accident-dui-duid traffic-accident -104.971851 39.736874 cheesman-park 0 1
... ... ... ... ... ... ... ...
2012-09-30 23:29:00 theft-of-motor-vehicle auto-theft -104.988838 39.686925 overland 1 0
2012-09-30 23:41:00 traffic-accident-hit-and-run traffic-accident -105.087598 39.638462 marston 0 1
2012-09-30 23:43:00 robbery-business robbery -104.772712 39.781966 gateway-green-valley-ranch 1 0

43045 rows × 7 columns

原理

# 使用datetime模块的time对象
import datetime
crime.between_time(datetime.time(2,0), datetime.time(5,0), include_end=False)
  OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID GEO_LON GEO_LAT NEIGHBORHOOD_ID IS_CRIME IS_TRAFFIC
REPORTED_DATE              
2014-06-29 02:01:00 traffic-accident-dui-duid traffic-accident -105.000149 39.745753 cbd 0 1
2014-06-29 02:00:00 disturbing-the-peace public-disorder -105.020719 39.706674 athmar-park 1 0
2014-06-29 02:18:00 curfew public-disorder -105.001552 39.769505 sunnyside 1 0
... ... ... ... ... ... ... ...
2017-09-13 02:21:00 assault-simple other-crimes-against-persons -104.925733 39.654184 university-hills 1 0
2017-09-13 03:21:00 traffic-accident-dui-duid traffic-accident -105.010711 39.757385 highland 0 1
2017-09-13 02:15:00 traffic-accident-hit-and-run traffic-accident -105.043950 39.787436 regis 0 1

29078 rows × 7 columns

# 选取第一个时间
# 用两种方法加六个月
first_date = crime_sort.index[0]
first_date
#Timestamp('2012-01-02 00:06:00')
first_date + pd.offsets.MonthBegin(6)
#Timestamp('2012-07-01 00:06:00')
first_date + pd.offsets.MonthEnd(6)
#Timestamp('2012-06-30 00:06:00')

更多

# 使用自定义的DateOffset对象
dt = pd.Timestamp('2012-1-16 13:40')
dt + pd.DateOffset(months=1)
#Timestamp('2012-02-16 13:40:00')
# 一个使用更多日期和时间的例子
do = pd.DateOffset(years=2, months=5, days=3, hours=8, seconds=10)
pd.Timestamp('2012-1-22 03:22') + do
#Timestamp('2014-06-25 11:22:10')
pd.options.display.max_rows=60

猜你喜欢

转载自blog.csdn.net/weixin_48135624/article/details/114240439