[Python] Reshaping Data with pandas(1)
2 minute read
Pandas
.transpose()
- switch column & row
.pivot()
- long to wide
.pivot_table()
- long to wide
# import pkg & dataset
import numpy as np
import pandas as pd
df_fifa = pd.read_csv("players_20.csv")
.transpose()
df1 = df_fifa.set_index("short_name")[["height_cm", "weight_kg"]]
df1.head(3)
|
height_cm |
weight_kg |
short_name |
|
|
L. Messi |
170 |
72 |
Cristiano Ronaldo |
187 |
83 |
Neymar Jr |
175 |
68 |
short_name |
L. Messi |
Cristiano Ronaldo |
Neymar Jr |
J. Oblak |
E. Hazard |
K. De Bruyne |
M. ter Stegen |
V. van Dijk |
L. Modrić |
M. Salah |
... |
M. Gallagher |
Huang Jiahui |
M. Sagaf |
E. Tweed |
P. Martin |
Shao Shuai |
Xiao Mingjie |
Zhang Wei |
Wang Haijian |
Pan Ximing |
height_cm |
170 |
187 |
175 |
188 |
175 |
181 |
187 |
193 |
172 |
175 |
... |
178 |
183 |
177 |
180 |
188 |
186 |
177 |
186 |
185 |
182 |
weight_kg |
72 |
83 |
68 |
87 |
74 |
70 |
85 |
92 |
66 |
71 |
... |
70 |
74 |
70 |
72 |
84 |
79 |
66 |
75 |
74 |
78 |
2 rows × 18278 columns
.pivot()
- values가 전체인 경우 생략
- 중복값이 있는 경우 제거해야함
# 데이터 불러오기
fifa_long = pd.read_csv("fifa_long.csv")
fifa_long.head(3)
|
name |
movement |
overall |
attacking |
0 |
messi |
shooting |
92 |
90 |
1 |
ronaldo |
shooting |
91 |
89 |
2 |
messi |
passing |
95 |
94 |
# values가 1개인 경우
fifa_long.pivot(index="name", columns="movement", values="overall")
movement |
dribbling |
passing |
shooting |
name |
|
|
|
messi |
96 |
95 |
92 |
ronaldo |
88 |
82 |
91 |
# values가 복수개인 경우
fifa_long.pivot(index="name", columns="movement", values=["overall", "attacking"])
# values가 2개이므로 위와 같음
# fifa_long.pivot(index="name", columns="movement")
|
overall |
attacking |
movement |
dribbling |
passing |
shooting |
dribbling |
passing |
shooting |
name |
|
|
|
|
|
|
messi |
96 |
95 |
92 |
88 |
94 |
90 |
ronaldo |
88 |
82 |
91 |
83 |
83 |
89 |
# 중복 행 만들기
fifa_long = fifa_long.append({"name":"ronaldo","movement":"dribbling","overall":85,"attacking":84}, ignore_index=True)
fifa_long
|
name |
movement |
overall |
attacking |
0 |
messi |
shooting |
92 |
90 |
1 |
ronaldo |
shooting |
91 |
89 |
2 |
messi |
passing |
95 |
94 |
3 |
ronaldo |
passing |
82 |
83 |
4 |
messi |
dribbling |
96 |
88 |
5 |
ronaldo |
dribbling |
88 |
83 |
6 |
ronaldo |
dribbling |
85 |
84 |
7 |
ronaldo |
dribbling |
85 |
84 |
# 중복 행 제거
fifa_long.drop(7, axis=0)
|
name |
movement |
overall |
attacking |
0 |
messi |
shooting |
92 |
90 |
1 |
ronaldo |
shooting |
91 |
89 |
2 |
messi |
passing |
95 |
94 |
3 |
ronaldo |
passing |
82 |
83 |
4 |
messi |
dribbling |
96 |
88 |
5 |
ronaldo |
dribbling |
88 |
83 |
6 |
ronaldo |
dribbling |
85 |
84 |
.pivot_table()
- 다음 3가지의 조건 중 하나라도 만족하는 경우 사용(즉, pivot과의 차이)
- index/column pair이 한 개 이상인 경우
- multi-index로 pivot을 원하는 경우
- summary statistic을 산출하고 싶은 경우
- parameter
- index - 행 기준
- columns - 열 기준
- values - 값 기준
- aggfunc - 값에 대한 통계치, default는 mean
- margins - 총계
# dataset player included in fc barcelona or real madrid
df2 = df_fifa[["sofifa_id", "short_name", "age", "height_cm", "weight_kg", "nationality", "club"]]
df_rm_fbc = df2[df2.club.isin(["FC Barcelona", "Real Madrid"])]
# maximum height of player by nationality & club
df_rm_fbc.pivot_table(index="nationality",
columns="club",
values="height_cm",
aggfunc="max").head(3)
club |
FC Barcelona |
Real Madrid |
nationality |
|
|
Argentina |
170.0 |
NaN |
Belgium |
NaN |
199.0 |
Brazil |
190.0 |
186.0 |
# total count of players of RM and FBC by nationality & club
df_rm_fbc.pivot_table(index="nationality",
columns="club",
values="sofifa_id",
aggfunc="count",
margins=True)
club |
FC Barcelona |
Real Madrid |
All |
nationality |
|
|
|
Argentina |
1.0 |
NaN |
1 |
Belgium |
NaN |
2.0 |
2 |
Brazil |
3.0 |
6.0 |
9 |
Chile |
1.0 |
NaN |
1 |
Colombia |
NaN |
1.0 |
1 |
Costa Rica |
NaN |
1.0 |
1 |
Croatia |
1.0 |
1.0 |
2 |
Dominican Republic |
NaN |
1.0 |
1 |
France |
5.0 |
3.0 |
8 |
Germany |
1.0 |
1.0 |
2 |
Japan |
1.0 |
1.0 |
2 |
Netherlands |
2.0 |
NaN |
2 |
Portugal |
1.0 |
NaN |
1 |
Senegal |
1.0 |
NaN |
1 |
Serbia |
NaN |
1.0 |
1 |
Spain |
15.0 |
13.0 |
28 |
Uruguay |
1.0 |
1.0 |
2 |
Wales |
NaN |
1.0 |
1 |
All |
33.0 |
33.0 |
66 |