import pandas as pd

data = pd.read_html('https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD%EC%9D%98_%EC%9D%B8%EA%B5%AC')

인구수 = data[1]
사망자수 = 인구수[['사망자수(명)']]
사망자수

사망자수.sum()[0]

28518711

format(사망자수.sum()[0], ',')

'28,518,711'

pandas 공식홈페이지 tutorial

What kind of data does pandas handle?

import pandas as pd

출처 : 공식 홈페이지

# DataFrame은 python의 dict로 만들 수 있습니다.
# 그러나 실제 데이터는 대부분 csv로 되어있어, dict로 다루실일이 많이 없을거에요.
df = pd.DataFrame(
            {
                "Name": [
                    "Braund, Mr. Owen Harris",
                    "Allen, Mr. William Henry",
                    "Bonnell, Miss. Elizabeth",
            ],
                "Age": [22, 35, 58],
                "Sex": ["male", "male", "female"],
            }
        )

df

시리즈는 데이터프레임에서 하나의 컬럼입니다.

df["Age"]

0    22
1    35
2    58
Name: Age, dtype: int64

type(df["Age"])

pandas.core.series.Series

df[["Age"]]

type(df[['Age']])

pandas.core.frame.DataFrame

Do something with a DataFrame or Series

df["Age"].max()

58

df["Age"].min()

22

df["Age"].mean()

38.333333333333336

df["Age"].var()

332.3333333333333

df["Age"].std()

18.230011885167087

df.dtypes

Name    object
Age      int64
Sex     object
dtype: object

df.describe()

How do I read and write tabular data?

titanic = pd.read_csv("train.csv")

titanic

titanic.head()

titanic.tail()

titanic.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

titanic.to_excel("titanic.xlsx", sheet_name="passengers", index=False)

titanic_read_excel = pd.read_excel("titanic.xlsx", sheet_name="passengers")
titanic_read_excel

titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB

How do I select a subset of a DataFrame?

How do I select specific columns from a DataFrame?

titanic["Age"].shape

(891,)

titanic["Sex"].shape

(891,)

titanic[["Age", "Sex"]] # 괄호가 하나가 안되는 이유는 DataFrame이기 때문

type(titanic[["Age", "Sex"]])

pandas.core.frame.DataFrame

titanic[["Age", "Sex"]].shape

(891, 2)

How do I filter specific rows from a DataFrame?

above_35 = titanic[titanic["Age"] > 35]
above_35.head(10)

titanic["Age"] > 35
(titanic["Age"] > 35).sum()

217

above_35.shape

(217, 12)

# | : or(둘 중 하나), & : and(둘 다)
class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)]

titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB

age_no_na = titanic[titanic["Age"].notna()]
age_no_na.shape

(714, 12)

How do I select specific rows and columns from a DataFrame?

adult_names = titanic.loc[titanic["Age"] > 35, "Name"]
adult_names

1      Cumings, Mrs. John Bradley (Florence Briggs Th...
6                                McCarthy, Mr. Timothy J
11                              Bonnell, Miss. Elizabeth
13                           Andersson, Mr. Anders Johan
15                      Hewlett, Mrs. (Mary D Kingcome) 
                             ...                        
865                             Bystrom, Mrs. (Karolina)
871     Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
873                          Vander Cruyssen, Mr. Victor
879        Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
885                 Rice, Mrs. William (Margaret Norton)
Name: Name, Length: 217, dtype: object

titanic.iloc[9:25, 2:5]

크롤링 데이터로 웹페이지 만들기

import pandas as pd

data = pd.read_html('https://ridibooks.com/category/bestsellers/2200')
data

import requests
from bs4 import BeautifulSoup

url = 'https://ridibooks.com/category/bestsellers/2200' #수정
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text

soup = BeautifulSoup(html, 'html.parser')

bookservices = soup.select('.title_text') #수정
for no, book in enumerate(bookservices, 1):
    print(no, book.text.strip())

1 시드 마이어
2 한 권으로 읽는 컴퓨터 구조와 프로그래밍
3 개발자에서 아키텍트로
4 컨테이너 인프라 환경 구축을 위한 쿠버네티스/도커
5 비전공자를 위한 이해할 수 있는 IT 지식
6 오브젝트
7 Node.js 디자인 패턴 바이블
8 눈치껏 못 배웁니다, 일센스
9 개정판 | 시작하세요! 도커/쿠버네티스
10 다재다능 코틀린 프로그래밍
11 파이썬으로 살펴보는 아키텍처 패턴
12 혼자 공부하는 머신러닝+딥러닝
13 비전공자도 배워서 바로 쓰는 비즈니스 데이터 분석 입문
14 Let's Get IT 자바스크립트 프로그래밍
15 개정판 | 리액트를 다루는 기술
16 메타버스, 이미 시작된 미래
17 IT 좀 아는 사람
18 이것이 취업을 위한 코딩 테스트다 with 파이썬
19 모던 자바스크립트 Deep Dive
20 NGINX 쿡북

import requests
from bs4 import BeautifulSoup

url = 'https://ridibooks.com/category/bestsellers/2200' #수정
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text

soup = BeautifulSoup(html, 'html.parser')

bookservices = soup.select('.thumbnail') #수정
for no, book in enumerate(bookservices, 1):
    print(no, book['alt'], 'https:' + book['data-src'])

1 시드 마이어 https://img.ridicdn.net/cover/194000109/large#1
2 한 권으로 읽는 컴퓨터 구조와 프로그래밍 https://img.ridicdn.net/cover/3649000021/large#1
3 개발자에서 아키텍트로 https://img.ridicdn.net/cover/443000917/large#1
4 컨테이너 인프라 환경 구축을 위한 쿠버네티스/도커 https://img.ridicdn.net/cover/754031863/large#1
5 비전공자를 위한 이해할 수 있는 IT 지식 https://img.ridicdn.net/cover/4489000001/large#1
6 오브젝트 https://img.ridicdn.net/cover/1160000027/large#1
7 Node.js 디자인 패턴 바이블 https://img.ridicdn.net/cover/194000106/large#1
8 눈치껏 못 배웁니다, 일센스 https://img.ridicdn.net/cover/222002588/large#1
9 개정판 | 시작하세요! 도커/쿠버네티스 https://img.ridicdn.net/cover/1160000029/large#1
10 다재다능 코틀린 프로그래밍 https://img.ridicdn.net/cover/194000105/large#1
11 파이썬으로 살펴보는 아키텍처 패턴 https://img.ridicdn.net/cover/443000912/large#1
12 혼자 공부하는 머신러닝+딥러닝 https://img.ridicdn.net/cover/443000859/large#1
13 비전공자도 배워서 바로 쓰는 비즈니스 데이터 분석 입문 https://img.ridicdn.net/cover/1370000008/large#1
14 Let's Get IT 자바스크립트 프로그래밍 https://img.ridicdn.net/cover/754031846/large#1
15 개정판 | 리액트를 다루는 기술 https://img.ridicdn.net/cover/754026976/large#1
16 메타버스, 이미 시작된 미래 https://img.ridicdn.net/cover/2777000044/large#1
17 IT 좀 아는 사람 https://img.ridicdn.net/cover/1046000113/large#1
18 이것이 취업을 위한 코딩 테스트다 with 파이썬 https://img.ridicdn.net/cover/443000825/large#1
19 모던 자바스크립트 Deep Dive https://img.ridicdn.net/cover/1160000024/large#1
20 NGINX 쿡북 https://img.ridicdn.net/cover/443000914/large#1

import requests
from bs4 import BeautifulSoup

url = 'https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=%EB%B0%95%EC%8A%A4%EC%98%A4%ED%94%BC%EC%8A%A4' #수정
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text

soup = BeautifulSoup(html, 'html.parser')

bookservices = soup.select('.name') #수정
for no, book in enumerate(bookservices, 1):
    print(no, book.text.strip())

1 발신제한
2 크루엘라
3 콰이어트 플레이스 2
4 미드나이트
5 킬러의 보디가드 2
6 루카
7 컨저링 3: 악마가 시켰다
8 인 더 하이츠
9 괴기맨숀
10 꽃다발 같은 사랑을 했다
11 체르노빌 1986
12 빛나는 순간
13 랑종
14 블라이스 스피릿
15 분노의 질주: 더 얼티메이트
16 극장판 귀멸의 칼날: 무한열...
17 다크 앤드 위키드
18 매직아치
19 시카다 3301
20 아이윌 송
21 우리는 매일매일
22 메이드 인 루프탑
23 여고괴담 여섯번째 이야기 :...
24 학교 가는 길
25 이보다 더 좋을 순 없다
26 크레센도
27 파리의 연인
28 샤먼 로드
29 트립 투 그리스
30 사랑하고 사랑받고 차고 차이고
31 극장판 귀멸의 칼날: 무한열...
32 루카
33 학교 가는 길
34 크루엘라
35 분노의 질주: 더 얼티메이트
36 콰이어트 플레이스 2
37 미드나이트
38 사랑하고 사랑받고 차고 차이고
39 체르노빌 1986
40 컨저링 3: 악마가 시켰다
41 발신제한
42 킬러의 보디가드 2
43 인 더 하이츠
44 여고괴담 여섯번째 이야기 :...
45 그냥 길가는 나그네
46 네잎클로버 27 15
47 삼공

import requests
from bs4 import BeautifulSoup

url = 'https://ridibooks.com/category/bestsellers/2200' #수정
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text

soup = BeautifulSoup(html, 'html.parser')

책순위 = []
책이름 = []
책이미지 = []

bookservices = soup.select('.thumbnail') #수정
for no, book in enumerate(bookservices, 1):
    책순위.append(no)
    책이름.append(book['alt'])
    책이미지.append('https:' + book['data-src'])

df = pd.DataFrame({
    '책순위' : 책순위,
    '책이름' : 책이름,
    '책이미지' : 책이미지
})
df

df.to_html('index.html')

def 이미지변환(path):
    return f'<img src="{path}" width="60" >'

df.to_html('index.html', escape=False, formatters=dict(책이미지=이미지변환))

How to create plots in pandas?

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('train.csv')
df[['SibSp', 'Parch']].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fab280e5050>

df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

df.plot.scatter(x="Age", y="Fare", alpha=0.5)

<matplotlib.axes._subplots.AxesSubplot at 0x7fab28749c90>

df[['Age']].plot.box()

<matplotlib.axes._subplots.AxesSubplot at 0x7fab283c1050>

How to create new columns derived from existing columns?

df['Family'] = 1 + df['SibSp'] + df['Parch']
df

How to calculate summary statistics?

Aggregating statistics

import pandas as pd

df = pd.read_csv('train.csv')
df.head()

df["Age"].mean() # 평균

29.69911764705882

df[["Age", "Fare"]].median() # 중앙값

Age     28.0000
Fare    14.4542
dtype: float64

df[["Age", "Fare"]].describe() # 일반 통계치

Aggregating statistics grouped by category

df[["Sex", "Age"]].groupby("Sex").mean()

df.groupby("Sex").mean()

df.groupby("Sex")["Age"].mean()

Sex
female    27.915709
male      30.726645
Name: Age, dtype: float64

df.groupby(["Sex", "Pclass"])["Fare"].mean()

Sex     Pclass
female  1         106.125798
        2          21.970121
        3          16.118810
male    1          67.226127
        2          19.741782
        3          12.661633
Name: Fare, dtype: float64

Count number of records by category

df["Pclass"].value_counts()

3    491
1    216
2    184
Name: Pclass, dtype: int64

df["Sex"].value_counts()

male      577
female    314
Name: Sex, dtype: int64

How to reshape the layout of tables?

Sort table rows

# sorted(리스트) - 리스트 안에 값을 변경하지 않고 return 값만 정렬

df.sort_values(by="Age").head() # 원본을 변경하지 않고 정렬
df.sort_values(by=['Pclass', 'Age'], ascending=False).head()

Long to wide table format

여성 = df[df["Sex"] == "female"]
여성.head()

여성.sort_index().groupby(["Age"]).head(5) # 정렬하는 방법은 sort_values와 sort_index가 있습니다.
여성.sort_index(ascending=False).groupby(["Age"]).head(5)
여성[::-1]
여성[:]

여성.pivot(index="PassengerId", columns="Pclass", values="Fare") # 데이터 재구조화

How to combine data from multiple tables?

Concatenating objects

data = {
    '수학' : [90, 80],
    '영어' : [70, 60]
}

data2 = {
    '언어' : [20, 70],
    '과학' : [30, 60]
}

data3 = {
    '수학' : [100, 90],
    '영어' : [85, 65]
}

data = pd.DataFrame(data)
data2 = pd.DataFrame(data2)
data3 = pd.DataFrame(data3)

data
data2

pd.concat([data, data2], axis=0)

# data['언어'] = data2['언어']
# data['과학'] = data2['과학']
# data[['언어', '과학']] = data2[['언어', '과학']]

data

pd.concat([data, data2], axis=1)

pd.concat([data, data3], axis=1)

pd.concat([data, data3], axis=0)

Join tables using a common identifier

data = {
    '이름' : ['영희', '철수', '호준'],
    '수학' : [70, 60, 90]
}

data2 = {
    '이름' : ['영희', '호준'],
    '과학' : [50, 70],
    '언어' : [90, 60]
}

data = pd.DataFrame(data)
data2 = pd.DataFrame(data2)

data

merge = pd.merge(data, data2, how="left", on="이름")
merge

pandas datetime

df = pd.DataFrame({'year': [2021, 2021],
                   'month': [7, 7],
                   'day': [9, 10]})

df

data = pd.to_datetime(df)
data

0   2021-07-09
1   2021-07-10
dtype: datetime64[ns]

data.dt.year

0    2021
1    2021
dtype: int64

data.dt.month

0    7
1    7
dtype: int64

data.dt.day

0     9
1    10
dtype: int64

data.dt.weekday

0    4
1    5
dtype: int64

data.dt.day_name() #Series에서는 day_name(), weekday_name() - 버전업 되면서 삭제됨

0      Friday
1    Saturday
dtype: object

pd.to_datetime('now') # UTC 시간

Timestamp('2021-07-09 06:19:21.943875')

How to manipulate textual data?

df = pd.read_csv('train.csv')
df.head()

df["Name"].str.lower()
df["Name"].str.split(",")
df["Name"].str.contains("Mr")
df["Name"].str.contains("Mr").value_counts()
df[df["Name"].str.contains("Mr")]

df["Sex"].replace({"male": 1, "female": 0})

0      1
1      0
2      0
3      0
4      1
      ..
886    1
887    0
888    0
889    1
890    1
Name: Sex, Length: 891, dtype: int64

참조문헌

판다스 공식 홈페이지

	사망자수(명)
0	359042
1	337948
2	353818
3	357701
4	414366
...	...
91	280827
92	285534
93	298820
94	295132
95	305100

	Age
count	3.000000
mean	38.333333
std	18.230012
min	22.000000
25%	28.500000
50%	35.000000
75%	46.500000
max	58.000000

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	0	PC 17599	71.2833	C85	C
6	7	0	1	McCarthy, Mr. Timothy J	male	54.0	0	0	17463	51.8625	E46	S
11	12	1	1	Bonnell, Miss. Elizabeth	female	58.0	0	0	113783	26.5500	C103	S
13	14	0	3	Andersson, Mr. Anders Johan	male	39.0	1	5	347082	31.2750	NaN	S
15	16	1	2	Hewlett, Mrs. (Mary D Kingcome)	female	55.0	0	0	248706	16.0000	NaN	S
25	26	1	3	Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...	female	38.0	1	5	347077	31.3875	NaN	S
30	31	0	1	Uruchurtu, Don. Manuel E	male	40.0	0	0	PC 17601	27.7208	NaN	C
33	34	0	2	Wheadon, Mr. Edward H	male	66.0	0	0	C.A. 24579	10.5000	NaN	S
35	36	0	1	Holverson, Mr. Alexander Oskar	male	42.0	1	0	113789	52.0000	NaN	S
40	41	0	3	Ahlin, Mrs. Johan (Johanna Persdotter Larsson)	female	40.0	1	0	7546	9.4750	NaN	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
886	887	0	2	Montvila, Rev. Juozas	male	27.0	0	0	211536	13.00	NaN	S
887	888	1	1	Graham, Miss. Margaret Edith	female	19.0	0	0	112053	30.00	B42	S
888	889	0	3	Johnston, Miss. Catherine Helen "Carrie"	female	NaN	1	2	W./C. 6607	23.45	NaN	S
889	890	1	1	Behr, Mr. Karl Howell	male	26.0	0	0	111369	30.00	C148	C
890	891	0	3	Dooley, Mr. Patrick	male	32.0	0	0	370376	7.75	NaN	Q

	Pclass	Name	Sex
9	2	Nasser, Mrs. Nicholas (Adele Achem)	female
10	3	Sandstrom, Miss. Marguerite Rut	female
11	1	Bonnell, Miss. Elizabeth	female
12	3	Saundercock, Mr. William Henry	male
13	3	Andersson, Mr. Anders Johan	male
14	3	Vestrom, Miss. Hulda Amanda Adolfina	female
15	2	Hewlett, Mrs. (Mary D Kingcome)	female
16	3	Rice, Master. Eugene	male
17	2	Williams, Mr. Charles Eugene	male
18	3	Vander Planke, Mrs. Julius (Emelia Maria Vande...	female
19	3	Masselmani, Mrs. Fatima	female
20	2	Fynney, Mr. Joseph J	male
21	2	Beesley, Mr. Lawrence	male
22	3	McGowan, Miss. Anna "Annie"	female
23	1	Sloper, Mr. William Thompson	male
24	3	Palsson, Miss. Torborg Danira	female

	책순위	책이름	책이미지
0	1	시드 마이어	https://img.ridicdn.net/cover/194000109/large#1
1	2	한 권으로 읽는 컴퓨터 구조와 프로그래밍	https://img.ridicdn.net/cover/3649000021/large#1
2	3	개발자에서 아키텍트로	https://img.ridicdn.net/cover/443000917/large#1
3	4	컨테이너 인프라 환경 구축을 위한 쿠버네티스/도커	https://img.ridicdn.net/cover/754031863/large#1
4	5	비전공자를 위한 이해할 수 있는 IT 지식	https://img.ridicdn.net/cover/4489000001/large#1
5	6	오브젝트	https://img.ridicdn.net/cover/1160000027/large#1
6	7	Node.js 디자인 패턴 바이블	https://img.ridicdn.net/cover/194000106/large#1
7	8	눈치껏 못 배웁니다, 일센스	https://img.ridicdn.net/cover/222002588/large#1
8	9	개정판 \| 시작하세요! 도커/쿠버네티스	https://img.ridicdn.net/cover/1160000029/large#1
9	10	다재다능 코틀린 프로그래밍	https://img.ridicdn.net/cover/194000105/large#1
10	11	파이썬으로 살펴보는 아키텍처 패턴	https://img.ridicdn.net/cover/443000912/large#1
11	12	혼자 공부하는 머신러닝+딥러닝	https://img.ridicdn.net/cover/443000859/large#1
12	13	비전공자도 배워서 바로 쓰는 비즈니스 데이터 분석 입문	https://img.ridicdn.net/cover/1370000008/large#1
13	14	Let's Get IT 자바스크립트 프로그래밍	https://img.ridicdn.net/cover/754031846/large#1
14	15	개정판 \| 리액트를 다루는 기술	https://img.ridicdn.net/cover/754026976/large#1
15	16	메타버스, 이미 시작된 미래	https://img.ridicdn.net/cover/2777000044/large#1
16	17	IT 좀 아는 사람	https://img.ridicdn.net/cover/1046000113/large#1
17	18	이것이 취업을 위한 코딩 테스트다 with 파이썬	https://img.ridicdn.net/cover/443000825/large#1
18	19	모던 자바스크립트 Deep Dive	https://img.ridicdn.net/cover/1160000024/large#1
19	20	NGINX 쿡북	https://img.ridicdn.net/cover/443000914/large#1

	Age	Fare
count	714.000000	891.000000
mean	29.699118	32.204208
std	14.526497	49.693429
min	0.420000	0.000000
25%	20.125000	7.910400
50%	28.000000	14.454200
75%	38.000000	31.000000
max	80.000000	512.329200

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
Sex
female	431.028662	0.742038	2.159236	27.915709	0.694268	0.649682	44.479818
male	454.147314	0.188908	2.389948	30.726645	0.429809	0.235702	25.523893

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
803	804	1	3	Thomas, Master. Assad Alexander	male	0.42	0	1	2625	8.5167	NaN	C
755	756	1	2	Hamalainen, Master. Viljo	male	0.67	1	1	250649	14.5000	NaN	S
644	645	1	3	Baclini, Miss. Eugenie	female	0.75	2	1	2666	19.2583	NaN	C
469	470	1	3	Baclini, Miss. Helene Barbara	female	0.75	2	1	2666	19.2583	NaN	C
78	79	1	2	Caldwell, Master. Alden Gates	male	0.83	0	2	248738	29.0000	NaN	S

	수학	영어	언어	과학
0	90.0	70.0	NaN	NaN
1	80.0	60.0	NaN	NaN
0	NaN	NaN	20.0	30.0
1	NaN	NaN	70.0	60.0

	이름	수학
0	영희	70
1	철수	60
2	호준	90

	year	month	day
0	2021	7	9
1	2021	7	10