그래프의 한글 폰트

!sudo apt install -y fonts-nanum*
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf
Reading package lists... Done
Building dependency tree       
Reading state information... Done
Note, selecting 'fonts-nanum-eco' for glob 'fonts-nanum*'
Note, selecting 'fonts-nanum' for glob 'fonts-nanum*'
Note, selecting 'fonts-nanum-gothic-light' for glob 'fonts-nanum*'
Note, selecting 'fonts-nanum-coding' for glob 'fonts-nanum*'
Note, selecting 'fonts-nanum-extra' for glob 'fonts-nanum*'
fonts-nanum is already the newest version (20170925-1).
fonts-nanum-coding is already the newest version (2.5-1).
fonts-nanum-eco is already the newest version (1.000-6).
fonts-nanum-extra is already the newest version (20170925-1).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.
/usr/share/fonts: caching, new cache contents: 0 fonts, 1 dirs
/usr/share/fonts/truetype: caching, new cache contents: 0 fonts, 3 dirs
/usr/share/fonts/truetype/humor-sans: caching, new cache contents: 1 fonts, 0 dirs
/usr/share/fonts/truetype/liberation: caching, new cache contents: 16 fonts, 0 dirs
/usr/share/fonts/truetype/nanum: caching, new cache contents: 31 fonts, 0 dirs
/usr/local/share/fonts: caching, new cache contents: 0 fonts, 0 dirs
/root/.local/share/fonts: skipping, no such directory
/root/.fonts: skipping, no such directory
/var/cache/fontconfig: cleaning cache directory
/root/.cache/fontconfig: not cleaning non-existent cache directory
/root/.fontconfig: not cleaning non-existent cache directory
fc-cache: succeeded
import matplotlib.pyplot as plt

plt.rc('font', family='NanumBarunGothic')

데이터 생성

import pandas as pd
import numpy as np
주가 = [ np.random.randint(10, 50) + i*2 for i in range(100) ]
import matplotlib.pyplot as plt
 
plt.plot(np.arange(1, 101), 주가)
plt.show()

딥러닝

독립 = pd.DataFrame(np.arange(1, 101))
종속 = pd.DataFrame(주가)
독립.shape, 종속.shape
((100, 1), (100, 1))
import tensorflow as tf
X = tf.keras.layers.Input(shape=[1]) # 독립변수의 col
Y = tf.keras.layers.Dense(1)(X) # 종속변수의 col
model = tf.keras.models.Model(X, Y)
model.compile(loss='mse') # MSE(Mean squared error)
model.fit(독립, 종속, epochs=10000, verbose=0)
model.fit(독립, 종속, epochs=10)
Epoch 1/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4112
Epoch 2/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4161
Epoch 3/10
4/4 [==============================] - 0s 2ms/step - loss: 118.6262
Epoch 4/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4688
Epoch 5/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4280
Epoch 6/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4940
Epoch 7/10
4/4 [==============================] - 0s 3ms/step - loss: 118.4082
Epoch 8/10
4/4 [==============================] - 0s 5ms/step - loss: 118.4647
Epoch 9/10
4/4 [==============================] - 0s 3ms/step - loss: 118.3927
Epoch 10/10
4/4 [==============================] - 0s 3ms/step - loss: 118.3953
<tensorflow.python.keras.callbacks.History at 0x7f9ca8374110>
model.predict([50])
model.predict([40])
array([[104.776886]], dtype=float32)
model.predict(독립)
array([[ 25.306517],
       [ 27.344217],
       [ 29.38192 ],
       [ 31.41962 ],
       [ 33.45732 ],
       [ 35.495026],
       [ 37.532726],
       [ 39.570427],
       [ 41.60813 ],
       [ 43.645832],
       [ 45.683533],
       [ 47.721237],
       [ 49.758938],
       [ 51.79664 ],
       [ 53.834343],
       [ 55.872044],
       [ 57.909744],
       [ 59.94745 ],
       [ 61.98515 ],
       [ 64.02285 ],
       [ 66.060555],
       [ 68.09825 ],
       [ 70.135956],
       [ 72.17366 ],
       [ 74.211365],
       [ 76.24906 ],
       [ 78.28676 ],
       [ 80.32446 ],
       [ 82.36217 ],
       [ 84.39987 ],
       [ 86.43758 ],
       [ 88.47527 ],
       [ 90.51298 ],
       [ 92.550674],
       [ 94.58838 ],
       [ 96.62608 ],
       [ 98.66378 ],
       [100.701485],
       [102.73919 ],
       [104.776886],
       [106.81459 ],
       [108.852295],
       [110.88999 ],
       [112.9277  ],
       [114.9654  ],
       [117.0031  ],
       [119.0408  ],
       [121.07851 ],
       [123.1162  ],
       [125.15391 ],
       [127.191605],
       [129.22931 ],
       [131.26701 ],
       [133.30472 ],
       [135.3424  ],
       [137.38013 ],
       [139.41782 ],
       [141.45552 ],
       [143.49323 ],
       [145.53091 ],
       [147.56863 ],
       [149.60632 ],
       [151.64403 ],
       [153.68173 ],
       [155.71944 ],
       [157.75714 ],
       [159.79483 ],
       [161.83253 ],
       [163.87024 ],
       [165.90794 ],
       [167.94565 ],
       [169.98335 ],
       [172.02104 ],
       [174.05875 ],
       [176.09645 ],
       [178.13416 ],
       [180.17186 ],
       [182.20956 ],
       [184.24725 ],
       [186.28496 ],
       [188.32266 ],
       [190.36037 ],
       [192.39807 ],
       [194.43578 ],
       [196.47346 ],
       [198.51117 ],
       [200.54887 ],
       [202.58658 ],
       [204.62428 ],
       [206.66199 ],
       [208.69968 ],
       [210.73738 ],
       [212.77509 ],
       [214.81279 ],
       [216.8505  ],
       [218.8882  ],
       [220.92589 ],
       [222.9636  ],
       [225.0013  ],
       [227.039   ]], dtype=float32)
model.get_weights()
[array([[2.0377018]], dtype=float32), array([23.268814], dtype=float32)]
2.072317 * 50 + 22.033388
125.649238
model.predict([50])
array([[125.15391]], dtype=float32)
plt.plot(np.arange(1, 101), 주가)
plt.plot(np.arange(1, 101), 2.07 * np.arange(1, 101) + 22.03)

plt.show()
오차값 = 종속 - model.predict(독립)
오차값
0
0 3.693483
1 13.655783
2 8.618080
3 -10.419621
4 -1.457321
... ...
95 5.111801
96 3.074112
97 7.036407
98 -12.001297
99 12.960999

100 rows × 1 columns

오차값의제곱 = 오차값 ** 2
오차값의제곱
0
0 13.641819
1 186.480401
2 74.271305
3 108.568492
4 2.123785
... ...
95 26.130511
96 9.450164
97 49.511030
98 144.031130
99 167.987483

100 rows × 1 columns

(오차값의제곱.sum())/100 ## MSE
0    118.389643
dtype: float64

최소 제곱법

  • y = ax + b
  • 참고자료 : https://ko.wikipedia.org/wiki/%EC%B5%9C%EC%86%8C%EC%A0%9C%EA%B3%B1%EB%B2%95
  • 구하고자 하는 방정식은 y = ax + b이다. 상수 a, b값을 안다면, a, b는 다음으로 계산할 수 있다.
  • a : $${\displaystyle a={\frac {n\Sigma XY-\Sigma X\Sigma Y}{n\Sigma X^{2}-\Sigma X\Sigma X}}}$$

  • b : $${\displaystyle b={\frac {\Sigma X^{2}\Sigma Y-\Sigma X\Sigma XY}{n\Sigma X^{2}-\Sigma X\Sigma X}}}$$

두수의곱 = 독립*종속
int(100 * 두수의곱.sum())
80715800
int(독립.sum() * 종속.sum())
64054200
int(100 * (독립 ** 2).sum())
33835000
int(독립.sum() * 독립.sum())
25502500
분자 = int(100 * 두수의곱.sum()) - int(독립.sum() * 종속.sum())
분모 = int(100 * (독립 ** 2).sum()) - int(독립.sum() * 독립.sum())
분자 / 분모
1.9995919591959197
model.get_weights()
[array([[2.0377018]], dtype=float32), array([23.268814], dtype=float32)]

${\displaystyle b={\frac {\Sigma X^{2}\Sigma Y-\Sigma X\Sigma XY}{n\Sigma X^{2}-\Sigma X\Sigma X}}}$

분자 = ((독립**2).sum() * 종속.sum()) - (독립.sum() * (독립*종속).sum())
분모 = (100 * (독립**2).sum()) - (독립.sum() * 독립.sum())
분자 / 분모
0    25.860606
dtype: float64

단순데이터의 히든레이어

매출액 = [2, 5, 10, 20, 40, 80]
광고액 = [1, 2, 4, 6, 8, 10]
순익 = [1, 1.5, 3, 10, 20, 60]
plt.plot(np.arange(1, 7), 매출액, label='매출액')
plt.plot(np.arange(1, 7), 광고액, label='광고액')
plt.plot(np.arange(1, 7), 순익, label='순익')

plt.legend()
plt.show()
독립 = pd.DataFrame({
    '매출액' : 매출액,
    '광고액' : 광고액
})
종속 = pd.DataFrame({
    '순익' : 순익
})
독립.shape, 종속.shape
((6, 2), (6, 1))
독립
매출액 광고액
0 2 1
1 5 2
2 10 4
3 20 6
4 40 8
5 80 10
X = tf.keras.layers.Input(shape=[2]) # 독립변수의 col
## 1. 히든 레이어의 노드(뉴런) 수는 2개부터 5개까지 점차 늘려보고 그래프를 확인해보세요.
H = tf.keras.layers.Dense(2, activation='swish')(X)
## 2. 히든 레이어의 수를 2개, 3개로 점차 늘려보고 그래프를 확인해보세요.
# H = tf.keras.layers.Dense(2, activation='swish')(H)
Y = tf.keras.layers.Dense(1)(H) # 종속변수의 col
model = tf.keras.models.Model(X, Y)
model.compile(loss='mse') # MSE(Mean squared error)
model.fit(독립, 종속, epochs=10000, verbose=0)
model.fit(독립, 종속, epochs=10)
Epoch 1/10
1/1 [==============================] - 0s 5ms/step - loss: 36.9067
Epoch 2/10
1/1 [==============================] - 0s 7ms/step - loss: 36.8933
Epoch 3/10
1/1 [==============================] - 0s 5ms/step - loss: 36.8940
Epoch 4/10
1/1 [==============================] - 0s 6ms/step - loss: 36.8806
Epoch 5/10
1/1 [==============================] - 0s 5ms/step - loss: 36.8812
Epoch 6/10
1/1 [==============================] - 0s 5ms/step - loss: 36.8679
Epoch 7/10
1/1 [==============================] - 0s 6ms/step - loss: 36.8685
Epoch 8/10
1/1 [==============================] - 0s 5ms/step - loss: 36.8551
Epoch 9/10
1/1 [==============================] - 0s 6ms/step - loss: 36.8558
Epoch 10/10
1/1 [==============================] - 0s 6ms/step - loss: 36.8424
<tensorflow.python.keras.callbacks.History at 0x7f9c9f410a10>
model.get_weights()
[array([[ 0.03463789, -0.05114527],
        [-0.4278318 ,  0.17921998]], dtype=float32),
 array([0.23633954, 1.2595593 ], dtype=float32),
 array([[-29.92225 ],
        [-29.651735]], dtype=float32),
 array([29.98308], dtype=float32)]
model.predict([[10, 4]])
array([[2.9516315]], dtype=float32)
model.predict(독립)
array([[ 0.31713486],
       [ 3.0344315 ],
       [ 2.9516315 ],
       [ 7.3105507 ],
       [25.017687  ],
       [46.368225  ]], dtype=float32)
종속
순익
0 1.0
1 1.5
2 3.0
3 10.0
4 20.0
5 60.0
plt.plot(np.arange(1, 7), model.predict(독립), label='예측값')
plt.plot(np.arange(1, 7), 종속, label='실제값')

plt.legend()
plt.show()

복잡 데이터의 히든레이어

  • 그럼 주가는 잘 맞출까? (곡선 형태)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

광고액 = [ np.random.randint(10, 50) + np.log(i*5) * 50 for i in range(1, 101) ]
계절성 = [ np.sin(i/3)*100 + i*3 + j*2 for i, j in zip(np.arange(1, 101), 광고액) ]
매출액 = [i**(np.log(np.log(i))) + j for i, j in zip(np.arange(1, 101), 계절성) ]

plt.plot(np.arange(1, 101), 광고액, label='a')
plt.plot(np.arange(1, 101), 계절성, label='b')
plt.plot(np.arange(1, 101), 매출액, label='c')

plt.legend()
plt.show()
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:7: RuntimeWarning: divide by zero encountered in log
  import sys
독립 = pd.DataFrame({
    '계절성' : 계절성,
    '광고액' : 광고액
})
종속 = pd.DataFrame({
    '매출액' : 매출액
})
독립.shape, 종속.shape
((100, 2), (100, 1))
import tensorflow as tf

#모델 준비
X = tf.keras.layers.Input(shape=[2]) # 독립변수의 col
H = tf.keras.layers.Dense(200, activation='swish')(X) # 노드의 수는 천천히 늘려감! (2 ~ 200)
# H = tf.keras.layers.Dense(5, activation='swish')(H) # 처음에는 주석처리!
Y = tf.keras.layers.Dense(1)(H) # 종속변수의 col
model = tf.keras.models.Model(X, Y)
model.compile(loss='mse') # MSE(Mean squared error)
model.fit(독립, 종속, epochs=10000, verbose=0)
model.fit(독립, 종속, epochs=10)
Epoch 1/10
4/4 [==============================] - 0s 3ms/step - loss: 31030.5703
Epoch 2/10
4/4 [==============================] - 0s 5ms/step - loss: 32710.4570
Epoch 3/10
4/4 [==============================] - 0s 3ms/step - loss: 31348.4434
Epoch 4/10
4/4 [==============================] - 0s 3ms/step - loss: 41892.3516
Epoch 5/10
4/4 [==============================] - 0s 3ms/step - loss: 37597.6719
Epoch 6/10
4/4 [==============================] - 0s 3ms/step - loss: 35604.6719
Epoch 7/10
4/4 [==============================] - 0s 3ms/step - loss: 33204.0156
Epoch 8/10
4/4 [==============================] - 0s 3ms/step - loss: 36819.3398
Epoch 9/10
4/4 [==============================] - 0s 5ms/step - loss: 43219.9883
Epoch 10/10
4/4 [==============================] - 0s 3ms/step - loss: 31084.8828
<tensorflow.python.keras.callbacks.History at 0x7f9c92f4c490>
plt.plot(np.arange(1, 101), model.predict(독립), label='예측값')
plt.plot(np.arange(1, 101), 종속, label='실제값')

plt.legend()
plt.show()
plt.plot(np.arange(1, 101), model.predict(독립), label='예측값')
plt.plot(np.arange(1, 101), 종속, label='실제값')

plt.legend()
plt.show()
import tensorflow as tf

#모델 준비
X = tf.keras.layers.Input(shape=[2]) # 독립변수의 col
H = tf.keras.layers.Dense(5, activation='swish')(X) # 노드의 수는 천천히 늘려감! (2 ~ 5)
H = tf.keras.layers.Dense(5, activation='swish')(H) # 천천히 늘려감! (2 ~ 5)
H = tf.keras.layers.Dense(5, activation='swish')(H) # 천천히 늘려감! (2 ~ 5)
H = tf.keras.layers.Dense(5, activation='swish')(H) # 천천히 늘려감! (2 ~ 5)
H = tf.keras.layers.Dense(5, activation='swish')(H) # 천천히 늘려감! (2 ~ 5)
Y = tf.keras.layers.Dense(1)(H) # 종속변수의 col
model = tf.keras.models.Model(X, Y)
model.compile(loss='mse') # MSE(Mean squared error)
model.fit(독립, 종속, epochs=10000, verbose=0)
model.fit(독립, 종속, epochs=10)
Epoch 1/10
4/4 [==============================] - 0s 4ms/step - loss: 17272.0957
Epoch 2/10
4/4 [==============================] - 0s 4ms/step - loss: 43156.6562
Epoch 3/10
4/4 [==============================] - 0s 3ms/step - loss: 16989.7070
Epoch 4/10
4/4 [==============================] - 0s 5ms/step - loss: 17889.2598
Epoch 5/10
4/4 [==============================] - 0s 3ms/step - loss: 17336.5117
Epoch 6/10
4/4 [==============================] - 0s 6ms/step - loss: 21185.4141
Epoch 7/10
4/4 [==============================] - 0s 5ms/step - loss: 16363.0391
Epoch 8/10
4/4 [==============================] - 0s 3ms/step - loss: 23066.0176
Epoch 9/10
4/4 [==============================] - 0s 3ms/step - loss: 22610.3809
Epoch 10/10
4/4 [==============================] - 0s 3ms/step - loss: 29358.0098
<tensorflow.python.keras.callbacks.History at 0x7f9c98ca82d0>
plt.plot(np.arange(1, 101), model.predict(독립), label='예측값')
plt.plot(np.arange(1, 101), 종속, label='실제값')

plt.legend()
plt.show()
import tensorflow as tf

#모델 준비
X = tf.keras.layers.Input(shape=[2]) # 독립변수의 col
H = tf.keras.layers.Dense(5, activation='swish')(X) # 노드의 수는 천천히 늘려감! (2 ~ 5)
for _ in range(10):
    H = tf.keras.layers.Dense(5, activation='swish')(H) # 천천히 늘려감! (2 ~ 5)
Y = tf.keras.layers.Dense(1)(H) # 종속변수의 col
model = tf.keras.models.Model(X, Y)
model.compile(loss='mse') # MSE(Mean squared error)
model.fit(독립, 종속, epochs=10000, verbose=0)
model.fit(독립, 종속, epochs=10)
Epoch 1/10
4/4 [==============================] - 0s 4ms/step - loss: 23658.7246
Epoch 2/10
4/4 [==============================] - 0s 3ms/step - loss: 23833.8301
Epoch 3/10
4/4 [==============================] - 0s 4ms/step - loss: 31947.2129
Epoch 4/10
4/4 [==============================] - 0s 4ms/step - loss: 25460.2480
Epoch 5/10
4/4 [==============================] - 0s 4ms/step - loss: 24909.1680
Epoch 6/10
4/4 [==============================] - 0s 4ms/step - loss: 24867.2402
Epoch 7/10
4/4 [==============================] - 0s 3ms/step - loss: 23354.0352
Epoch 8/10
4/4 [==============================] - 0s 3ms/step - loss: 26266.6719
Epoch 9/10
4/4 [==============================] - 0s 3ms/step - loss: 23356.0391
Epoch 10/10
4/4 [==============================] - 0s 3ms/step - loss: 24691.1309
<tensorflow.python.keras.callbacks.History at 0x7f9c93809fd0>
plt.plot(np.arange(1, 101), model.predict(독립), label='예측값')
plt.plot(np.arange(1, 101), 종속, label='실제값')

plt.legend()
plt.show()
  • 히든레이어는 1개 ~ 4개를 쌓아보고 정확도 측정하는 것이 보통.
  • 노드는 100개 ~ 200개를 쌓아보고 정확도 측정.
  • CNN이나 RNN 으로 넘어가기 전 좀 더 단순한 데이터로 신경망에 대해 학습할 것을 권함.