데이터 시각화 기초 문법 노트북
2021년 여름방학 데이터분석을 위한 시각화 기초문법
- 출처 : 데이터 사이언스 스쿨 (데이터 시각화)
!sudo apt install -y fonts-nanum*
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf
import matplotlib.pyplot as plt
plt.rc('font', family='NanumBarunGothic')
plt.title('한글 제목')
plt.plot([10, 20, 30, 40], [1, 4, 9, 16])
plt.xlabel("엑스축 라벨")
plt.ylabel("와이축 라벨")
plt.show()
import numpy as np
import pandas as pd
t = np.arange(0., 5., 0.2)
plt.title("라인 플롯에서 여러개의 선 그리기")
plt.plot(t, t, 'r--', t, 0.5 * t**2, 'bs:', t, 0.2 * t**3, 'g^-')
plt.show()
X = np.linspace(-np.pi, np.pi, 256)
C, S = np.cos(X), np.sin(X)
plt.title("legend를 표시한 플롯")
plt.plot(X, C, ls="--", label="cosine")
plt.plot(X, S, ls=":", label="sine")
plt.legend(loc=4) #범례 1, 2, 3, 4로 변경해 보세요.
plt.show()
X = np.linspace(-np.pi, np.pi, 256)
C, S = np.cos(X), np.sin(X)
plt.plot(X, C, label="cosine")
plt.xlabel("time")
plt.ylabel("amplitude")
plt.title("Cosine Plot")
plt.show()
import matplotlib as mpl
import matplotlib.pylab as plt
y = [2, 3, 1]
x = np.arange(len(y))
xlabel = ['가', '나', '다']
plt.title("Bar Chart")
plt.bar(x, y) #barh
plt.xticks(x, xlabel)
plt.yticks(sorted(y))
plt.xlabel("가나다")
plt.ylabel("빈도 수")
plt.show()
plt.axis('equal')
labels = ['개구리', '돼지', '개', '통나무']
sizes = [15, 30, 45, 10]
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral']
explode = (0, 0.1, 0, 0)
plt.title("Pie Chart")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=90)
plt.axis('equal')
plt.show()
data = [np.random.randint(1, 7) for i in range(100)]
data
data = []
for i in range(100):
data.append(np.random.randint(1, 7))
np.random.seed(0)
data = [np.random.randint(1, 7) for i in range(100000)]
plt.title("Histogram")
arrays, bins, patches = plt.hist(data, bins=11)
plt.show()
np.random.seed(0)
X = np.random.normal(0, 1, 100)
Y = np.random.normal(0, 1, 100)
plt.title("Scatter Plot")
plt.scatter(X, Y)
plt.show()
N = 30
np.random.seed(0)
x = np.random.rand(N)
y1 = np.random.rand(N)
y2 = np.random.rand(N)
y3 = np.pi * (15 * np.random.rand(N))**2
plt.title("Bubble Chart")
plt.scatter(x, y1, c=y2, s=y3)
plt.show()
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.images[0]
X
plt.title("mnist digits; 0")
plt.imshow(X, interpolation='nearest', cmap=plt.cm.bone_r)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.subplots_adjust(left=0.35, right=0.65, bottom=0.35, top=0.65)
plt.show()
import numpy as np
from skimage import io
import matplotlib.pyplot as plt
jeju = io.imread('jeju.jpg')
type(jeju)
jeju.shape
jeju
plt.imshow(jeju)
l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
l[::-1]
l[::2]
plt.imshow(jeju[::-1])
l = [[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
data = np.array(l)
data[data > 5]
data[:, 2]
data[2, :]
data[:, ::-1]
plt.imshow(jeju[:, ::-1])
plt.imshow(jeju[800:1200, 700:1150])
plt.imshow(jeju[::5, ::5])
plt.imshow(jeju[::10, ::10])
plt.imshow(jeju[::50, ::50])
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = sns.load_dataset("titanic")
df
df.info()
sns.countplot(x="class", data=df)
plt.title("타이타닉호의 각 클래스별, 승객 수")
plt.show()
sns.countplot(x="alive", data=df)
plt.title("타이타닉호의 각 클래스별, 승객 수")
plt.show()
sns.jointplot(x="fare", y="alive", data=df)
plt.suptitle("test", y=1.02)
plt.show()
sns.jointplot(x="pclass", y="survived", data=df, kind="kde")
plt.suptitle("Kernel Density Plot", y=1.02)
plt.show()
sns.pairplot(df)
plt.title("Pair Plot")
plt.show()
sns.barplot(x="pclass", y="fare", data=df)
plt.title("")
plt.show()
sns.violinplot(x="pclass", y="fare", data=df)
plt.title("등급별 운임 분포")
plt.show()