数据常用筛选方法
- 在数据中,选择需要的行或者列
- 基础索引方式,就是直接引用
- ioc[行索引名称或者条件,列索引名称或者标签]
- iloc[行索引位置,列索引位置]
import pandas as pd
import os
import numpy as np
os.getcwd()
'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据清洗之数据表处理'
os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')
df = pd.read_csv('baby_trade_history.csv', encoding='utf-8', dtype={'user_id':str})
df
|
user_id |
auction_id |
cat_id |
cat1 |
property |
buy_mount |
day |
0 |
786295544 |
41098319944 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
1 |
532110457 |
17916191097 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
21896936223 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
3 |
917056007 |
12515996043 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
4 |
444069173 |
20487688075 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
5 |
152298847 |
41840167463 |
121394024 |
50008168 |
21458:3408353;13023209:727117752;22009:2741771... |
1 |
20141103 |
6 |
513441334 |
19909384116 |
50010557 |
50008168 |
25935:21991;1628665:29784;22019:34731;22019:20... |
1 |
20121212 |
7 |
297411659 |
13540124907 |
50010542 |
50008168 |
21458:60020529;25935:31381;1633959:27247291;16... |
1 |
20121212 |
8 |
82830661 |
19948600790 |
50013874 |
28 |
21458:11580;21475:137325 |
1 |
20121101 |
9 |
475046636 |
10368360710 |
203527 |
28 |
22724:40168;22729:40278;21458:21817;2770200:24... |
1 |
20121101 |
10 |
734147966 |
15307958346 |
50018202 |
38 |
21458:3270827;7361532:28710594;7397093:7536994... |
2 |
20121101 |
11 |
68547330 |
21162876126 |
50012365 |
122650008 |
1628665:3233941;1628665:3233942;1628665:323393... |
1 |
20121123 |
12 |
697081418 |
15898050723 |
50013636 |
50008168 |
21458:19726868;1633959:179425852;13836282:1290... |
1 |
20121123 |
13 |
377550424 |
15771663914 |
50015841 |
28 |
1628665:3233941;1628665:3233942;3914866:11580;... |
1 |
20121123 |
14 |
88313935 |
22532727492 |
50013711 |
50008168 |
1628665:3233941;1628665:3233942;22019:3340598;... |
1 |
20131005 |
15 |
25918750 |
16078389250 |
50012359 |
122650008 |
21458:3405407;1633959:6186201;1628366:32799;81... |
1 |
20131005 |
16 |
350288528 |
35086271572 |
50010544 |
50008168 |
21458:61813;25935:21991;1628665:3233938;162866... |
1 |
20131129 |
17 |
348090113 |
17436967558 |
50009540 |
50014815 |
21458:21910;3110425:30696849;2191928:75373546;... |
1 |
20131129 |
18 |
1635282280 |
36153356431 |
50013207 |
50008168 |
1628665:29784;1628665:29799;2904342:31004;2201... |
1 |
20131129 |
19 |
530850018 |
22058239899 |
50024147 |
28 |
21458:205007542;43307470:5543413;2339128:62147... |
1 |
20140210 |
20 |
749507708 |
19171641742 |
50018860 |
28 |
21458:3602856;1628665:3233941;1628665:3233942;... |
1 |
20140210 |
21 |
201088567 |
38564176352 |
50013207 |
50008168 |
1628665:3233941;1628665:3233942;1628665:323393... |
1 |
20140502 |
22 |
469517728 |
8232924597 |
211122 |
38 |
21458:21782;36786:42781029;13023102:6999219;22... |
6 |
20140502 |
23 |
691367866 |
17712372914 |
121434042 |
50014815 |
21458:49341152;8021059:5525523;6851452:1398669... |
1 |
20140804 |
24 |
77193822 |
35537441586 |
50006520 |
50014815 |
22277:6262384;21458:30992;1628665:3233941;1628... |
2 |
20140804 |
25 |
605678021 |
15502618744 |
50010555 |
50008168 |
25935:31381;1628665:3233941;1628665:3233942;16... |
1 |
20130226 |
26 |
47702620 |
26481508332 |
121412034 |
50014815 |
21458:49341152;11057903:4036007;130475532:7537... |
1 |
20140918 |
27 |
763560371 |
40945285800 |
50012365 |
122650008 |
21458:30992;1628665:3233939;22007:30338;22007:... |
1 |
20150201 |
28 |
408028533 |
35838498718 |
50012442 |
50008168 |
21458:3596449;6811831:3446999;13023209:3446999... |
1 |
20141009 |
29 |
53566371 |
27177784760 |
121394024 |
50008168 |
21458:42090508;1628665:3233941;1628665:3233942... |
1 |
20141009 |
... |
... |
... |
... |
... |
... |
... |
... |
29941 |
413188001 |
16521677358 |
50012478 |
50014815 |
21458:28155;5434803:3636603;2815901:22583732;1... |
1 |
20130107 |
29942 |
474062095 |
21129724585 |
50013207 |
50008168 |
21458:21599;1628665:29798;1628665:3233938;1628... |
1 |
20130107 |
29943 |
797710454 |
18176728510 |
50013177 |
28 |
1628665:3233941;1628665:3233942;1628665:323393... |
1 |
20130107 |
29944 |
1716505453 |
37844041565 |
50010555 |
50008168 |
21458:30992;25935:31381;1628665:3233941;162866... |
1 |
20141231 |
29945 |
1966692323 |
42504930457 |
50012359 |
122650008 |
21458:3379652;1628665:3233940;1628665:3233938;... |
1 |
20141231 |
29946 |
641734831 |
22105131076 |
50014277 |
50014815 |
21458:21906;13227811:51479;13230966:75369014;3... |
2 |
20141016 |
29947 |
731030177 |
41666438142 |
121394024 |
50008168 |
21458:3443560;1628665:3233942;1628665:3233938;... |
1 |
20141016 |
29948 |
68515755 |
13953276547 |
50012788 |
28 |
21458:12376977;2112993:32075;1628665:92012;162... |
1 |
20130729 |
29949 |
180436843 |
23375100402 |
50012451 |
50008168 |
21458:33514;1633959:13343071;33030:29800;33162... |
1 |
20130729 |
29950 |
801784345 |
17629938386 |
50023670 |
28 |
21458:3550980;29154281:231350353;11684888:1045... |
1 |
20130729 |
29951 |
124458824 |
19739113764 |
50013636 |
50008168 |
21458:30992;13658074:9306734;1628665:3233941;1... |
1 |
20140322 |
29952 |
602141957 |
37251457564 |
50012360 |
122650008 |
21458:21599;1628665:29798;1628665:82340;162866... |
1 |
20140322 |
29953 |
595095853 |
41160643364 |
121364022 |
50008168 |
21458:80090256;1628665:29784;1628665:29796;162... |
1 |
20150111 |
29954 |
1905258237 |
42298652641 |
121452056 |
50008168 |
21458:30992;1628665:3233942;1628665:31614;1628... |
1 |
20150111 |
29955 |
1957645413 |
36768778465 |
121448033 |
38 |
6940834:29865;1628149:137593;21475:114226;2275... |
1 |
20140815 |
29956 |
1854778218 |
37200665444 |
50012361 |
122650008 |
21458:3645338;13023209:544768204;122217803:309... |
1 |
20140815 |
29957 |
268356658 |
36932456353 |
50010236 |
50014815 |
21458:10513072;12474507:706291650;3091143:9208... |
1 |
20141027 |
29958 |
196272909 |
10066997901 |
50009540 |
50014815 |
21458:21906;13229910:32056435;2191928:73664723... |
1 |
20141104 |
29959 |
23473499 |
38019470815 |
50010236 |
50014815 |
1628665:61550;1628665:3233940;1628665:3233936;... |
1 |
20141104 |
29960 |
816394377 |
19835118833 |
50003700 |
28 |
24448:73774385;6725953:48332;22044:30715;80047... |
1 |
20130912 |
29961 |
164859586 |
15842319049 |
50012479 |
28 |
NaN |
1 |
20130912 |
29962 |
119149466 |
26396292642 |
50008875 |
28 |
21458:30992;11684888:104528258;21475:11488282;... |
1 |
20130912 |
29963 |
704655047 |
10506866020 |
50007011 |
50008168 |
1628665:3233941;1628665:3233942;1628665:323393... |
1 |
20121206 |
29964 |
45662429 |
20745380642 |
50010555 |
50008168 |
25935:31381;1628665:3233941;1628665:3233942;16... |
1 |
20121206 |
29965 |
35711492 |
16563353438 |
50010544 |
50008168 |
21458:11580;25935:21991;1628665:92012;1628665:... |
1 |
20121206 |
29966 |
57747284 |
35169635909 |
50010549 |
50008168 |
21458:125202070;22019:3228688;22019:3248884;22... |
1 |
20140109 |
29967 |
287541325 |
19778523000 |
50007011 |
50008168 |
21458:112788583;1633959:3523439;3130834:209537... |
2 |
20140109 |
29968 |
82915321 |
12766532512 |
50011993 |
28 |
21475:137325;1628665:3233937;1628665:29798;162... |
1 |
20131008 |
29969 |
78259523 |
18309305134 |
50013711 |
50008168 |
21458:30992;1628665:29778;1628665:29793;163395... |
1 |
20131008 |
29970 |
758305789 |
20177445814 |
50018860 |
28 |
21458:3602856;1628665:29784;1628665:3233941;73... |
1 |
20131008 |
29971 rows × 7 columns
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29971 entries, 0 to 29970
Data columns (total 7 columns):
user_id 29971 non-null object
auction_id 29971 non-null int64
cat_id 29971 non-null int64
cat1 29971 non-null int64
property 29827 non-null object
buy_mount 29971 non-null int64
day 29971 non-null int64
dtypes: int64(5), object(2)
memory usage: 1.6+ MB
df.head(10)
|
user_id |
auction_id |
cat_id |
cat1 |
property |
buy_mount |
day |
0 |
786295544 |
41098319944 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
1 |
532110457 |
17916191097 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
21896936223 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
3 |
917056007 |
12515996043 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
4 |
444069173 |
20487688075 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
5 |
152298847 |
41840167463 |
121394024 |
50008168 |
21458:3408353;13023209:727117752;22009:2741771... |
1 |
20141103 |
6 |
513441334 |
19909384116 |
50010557 |
50008168 |
25935:21991;1628665:29784;22019:34731;22019:20... |
1 |
20121212 |
7 |
297411659 |
13540124907 |
50010542 |
50008168 |
21458:60020529;25935:31381;1633959:27247291;16... |
1 |
20121212 |
8 |
82830661 |
19948600790 |
50013874 |
28 |
21458:11580;21475:137325 |
1 |
20121101 |
9 |
475046636 |
10368360710 |
203527 |
28 |
22724:40168;22729:40278;21458:21817;2770200:24... |
1 |
20121101 |
df.columns
Index(['user_id', 'auction_id', 'cat_id', 'cat1', 'property', 'buy_mount',
'day'],
dtype='object')
df['user_id'].head(5)
0 786295544
1 532110457
2 249013725
3 917056007
4 444069173
Name: user_id, dtype: object
df[['user_id', 'cat1']].head(5)
|
user_id |
cat1 |
0 |
786295544 |
50022520 |
1 |
532110457 |
28 |
2 |
249013725 |
50014815 |
3 |
917056007 |
50014815 |
4 |
444069173 |
50008168 |
df[['user_id', 'cat1']][1:5]
|
user_id |
cat1 |
1 |
532110457 |
28 |
2 |
249013725 |
50014815 |
3 |
917056007 |
50014815 |
4 |
444069173 |
50008168 |
df.loc[3:4]
|
user_id |
auction_id |
cat_id |
cat1 |
property |
buy_mount |
day |
3 |
917056007 |
12515996043 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
4 |
444069173 |
20487688075 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
df.loc[:,['user_id','buy_mount']].head(5)
|
user_id |
buy_mount |
0 |
786295544 |
2 |
1 |
532110457 |
1 |
2 |
249013725 |
1 |
3 |
917056007 |
2 |
4 |
444069173 |
1 |
df.loc[1:3, ['user_id','buy_mount']]
|
user_id |
buy_mount |
1 |
532110457 |
1 |
2 |
249013725 |
1 |
3 |
917056007 |
2 |
df.loc[df.user_id=='249013725', ['user_id', 'buy_mount']]
|
user_id |
buy_mount |
2 |
249013725 |
1 |
df.loc[(df.user_id=='249013725') | (df.buy_mount>=1000), ['user_id', 'buy_mount']]
|
user_id |
buy_mount |
2 |
249013725 |
1 |
1164 |
1945590674 |
1500 |
5536 |
2288344467 |
10000 |
6627 |
117730165 |
2800 |
10402 |
32141414 |
1000 |
25675 |
173701616 |
2748 |
df.iloc[1:3]
|
user_id |
auction_id |
cat_id |
cat1 |
property |
buy_mount |
day |
1 |
532110457 |
17916191097 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
21896936223 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
df.iloc[1:3, 1:4]
|
auction_id |
cat_id |
cat1 |
1 |
17916191097 |
50011993 |
28 |
2 |
21896936223 |
50012461 |
50014815 |
df.iloc[:, [0, 2]].head(5)
|
user_id |
cat_id |
0 |
786295544 |
50014866 |
1 |
532110457 |
50011993 |
2 |
249013725 |
50012461 |
3 |
917056007 |
50018831 |
4 |
444069173 |
50013636 |
df.iloc[[1,10], [0,2]]
|
user_id |
cat_id |
1 |
532110457 |
50011993 |
10 |
734147966 |
50018202 |
loc按标签选择,iloc按顺序选择